You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2016/09/23 05:42:47 UTC
[3/4] incubator-systemml git commit: [SYSTEMML-557] Memory efficiency
frame block (array-based schema/meta)
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java
index 005b254..67674fe 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java
@@ -29,12 +29,11 @@ import java.io.Serializable;
import java.lang.ref.SoftReference;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
-import java.util.List;
import java.util.Map;
+import org.apache.commons.lang3.ArrayUtils;
import org.apache.hadoop.io.Writable;
import org.apache.sysml.lops.Lop;
import org.apache.sysml.parser.Expression.ValueType;
@@ -61,29 +60,32 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
private int _numRows = -1;
/** The schema of the data frame as an ordered list of value types */
- private List<ValueType> _schema = null;
+ private ValueType[] _schema = null;
/** The column names of the data frame as an ordered list of strings */
- private List<String> _colnames = null;
+ private String[] _colnames = null;
- private List<ColumnMetadata> _colmeta = null;
+ private ColumnMetadata[] _colmeta = null;
/** The data frame data as an ordered list of columns */
- private List<Array> _coldata = null;
+ private Array[] _coldata = null;
/** Cache for recode maps from frame meta data, indexed by column 0-based */
private Map<Integer, SoftReference<HashMap<String,Long>>> _rcdMapCache = null;
public FrameBlock() {
_numRows = 0;
- _schema = new ArrayList<ValueType>();
- _colnames = new ArrayList<String>();
- _colmeta = new ArrayList<ColumnMetadata>();
- _coldata = new ArrayList<Array>();
if( REUSE_RECODE_MAPS )
_rcdMapCache = new HashMap<Integer, SoftReference<HashMap<String,Long>>>();
}
+ /**
+ * Copy constructor for frame blocks, which uses a shallow copy for
+ * the schema (column types and names) but a deep copy for meta data
+ * and actual column data.
+ *
+ * @param that
+ */
public FrameBlock(FrameBlock that) {
this(that.getSchema(), that.getColumnNames());
copy(that);
@@ -92,32 +94,32 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
public FrameBlock(int ncols, ValueType vt) {
this();
- _schema.addAll(Collections.nCopies(ncols, vt));
+ _schema = UtilFunctions.nCopies(ncols, vt);
_colnames = createColNames(ncols);
+ _colmeta = new ColumnMetadata[ncols];
for( int j=0; j<ncols; j++ )
- _colmeta.add(new ColumnMetadata(0));
+ _colmeta[j] = new ColumnMetadata(0);
}
- public FrameBlock(List<ValueType> schema) {
+ public FrameBlock(ValueType[] schema) {
this(schema, new String[0][]);
}
- public FrameBlock(List<ValueType> schema, List<String> names) {
+ public FrameBlock(ValueType[] schema, String[] names) {
this(schema, names, new String[0][]);
}
- public FrameBlock(List<ValueType> schema, String[][] data) {
- this(schema, createColNames(schema.size()), data);
+ public FrameBlock(ValueType[] schema, String[][] data) {
+ this(schema, createColNames(schema.length), data);
}
- public FrameBlock(List<ValueType> schema, List<String> names, String[][] data) {
+ public FrameBlock(ValueType[] schema, String[] names, String[][] data) {
_numRows = 0; //maintained on append
- _schema = new ArrayList<ValueType>(schema);
- _colnames = new ArrayList<String>(names);
- _colmeta = new ArrayList<ColumnMetadata>();
- for( int j=0; j<_schema.size(); j++ )
- _colmeta.add(new ColumnMetadata(0));
- _coldata = new ArrayList<Array>();
+ _schema = schema;
+ _colnames = names;
+ _colmeta = new ColumnMetadata[_schema.length];
+ for( int j=0; j<_schema.length; j++ )
+ _colmeta[j] = new ColumnMetadata(0);
for( int i=0; i<data.length; i++ )
appendRow(data[i]);
if( REUSE_RECODE_MAPS )
@@ -148,7 +150,7 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
* @return
*/
public int getNumColumns() {
- return _schema.size();
+ return _schema.length;
}
/**
@@ -156,7 +158,7 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
*
* @return
*/
- public List<ValueType> getSchema() {
+ public ValueType[] getSchema() {
return _schema;
}
@@ -165,9 +167,9 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
*
* @return
*/
- public void setSchema(List<ValueType> schema) {
+ public void setSchema(ValueType[] schema) {
_schema = schema;
- _colnames = createColNames(schema.size());
+ _colnames = createColNames(schema.length);
}
/**
@@ -175,7 +177,7 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
*
* @return
*/
- public List<String> getColumnNames() {
+ public String[] getColumnNames() {
return _colnames;
}
@@ -183,7 +185,7 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
*
* @param colnames
*/
- public void setColumnNames(List<String> colnames) {
+ public void setColumnNames(String[] colnames) {
_colnames = colnames;
}
@@ -191,7 +193,7 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
*
* @return
*/
- public List<ColumnMetadata> getColumnMetadata() {
+ public ColumnMetadata[] getColumnMetadata() {
return _colmeta;
}
@@ -201,7 +203,7 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
* @return
*/
public ColumnMetadata getColumnMetadata(int c) {
- return _colmeta.get(c);
+ return _colmeta[c];
}
/**
@@ -221,16 +223,16 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
* @return
*/
public boolean isColumnMetadataDefault(int c) {
- return _colmeta.get(c).getMvValue() == null
- && _colmeta.get(c).getNumDistinct() == 0;
+ return _colmeta[c].getMvValue() == null
+ && _colmeta[c].getNumDistinct() == 0;
}
/**
*
* @param colmeta
*/
- public void setColumnMetadata(List<ColumnMetadata> colmeta) {
- _colmeta = new ArrayList<FrameBlock.ColumnMetadata>(colmeta);
+ public void setColumnMetadata(ColumnMetadata[] colmeta) {
+ System.arraycopy(colmeta, 0, _colmeta, 0, _colmeta.length);
}
/**
@@ -239,7 +241,7 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
* @param colmeta
*/
public void setColumnMetadata(int c, ColumnMetadata colmeta) {
- _colmeta.set(c, colmeta);
+ _colmeta[c] = colmeta;
}
/**
@@ -251,7 +253,7 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
public Map<String,Integer> getColumnNameIDMap() {
Map<String, Integer> ret = new HashMap<String, Integer>();
for( int j=0; j<getNumColumns(); j++ )
- ret.put(_colnames.get(j), j+1);
+ ret.put(_colnames[j], j+1);
return ret;
}
@@ -261,21 +263,24 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
*/
public void ensureAllocatedColumns(int numRows) {
//early abort if already allocated
- if( _schema.size() == _coldata.size() )
+ if( _coldata != null && _schema.length == _coldata.length )
return;
- //allocate column meta data
- for( int j=0; j<_schema.size(); j++ )
- _colmeta.add(new ColumnMetadata(0));
+ //allocate column meta data if necessary
+ if( _colmeta == null || _schema.length != _colmeta.length ) {
+ _colmeta = new ColumnMetadata[_schema.length];
+ for( int j=0; j<_schema.length; j++ )
+ _colmeta[j] = new ColumnMetadata(0);
+ }
//allocate columns if necessary
- for( int j=0; j<_schema.size(); j++ ) {
- if( j >= _coldata.size() )
- switch( _schema.get(j) ) {
- case STRING: _coldata.add(new StringArray(new String[numRows])); break;
- case BOOLEAN: _coldata.add(new BooleanArray(new boolean[numRows])); break;
- case INT: _coldata.add(new LongArray(new long[numRows])); break;
- case DOUBLE: _coldata.add(new DoubleArray(new double[numRows])); break;
- default: throw new RuntimeException("Unsupported value type: "+_schema.get(j));
- }
+ _coldata = new Array[_schema.length];
+ for( int j=0; j<_schema.length; j++ ) {
+ switch( _schema[j] ) {
+ case STRING: _coldata[j] = new StringArray(new String[numRows]); break;
+ case BOOLEAN: _coldata[j] = new BooleanArray(new boolean[numRows]); break;
+ case INT: _coldata[j] = new LongArray(new long[numRows]); break;
+ case DOUBLE: _coldata[j] = new DoubleArray(new double[numRows]); break;
+ default: throw new RuntimeException("Unsupported value type: "+_schema[j]);
+ }
}
_numRows = numRows;
}
@@ -286,7 +291,7 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
* @param newlen
*/
public void ensureColumnCompatibility(int newlen) {
- if( _coldata.size() > 0 && _numRows != newlen )
+ if( _coldata!=null && _coldata.length > 0 && _numRows != newlen )
throw new RuntimeException("Mismatch in number of rows: "+newlen+" (expected: "+_numRows+")");
}
@@ -295,10 +300,10 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
* @param size
* @return
*/
- public static List<String> createColNames(int size) {
- ArrayList<String> ret = new ArrayList<String>(size);
+ public static String[] createColNames(int size) {
+ String[] ret = new String[size];
for( int i=1; i<=size; i++ )
- ret.add(createColName(i));
+ ret[i-1] = createColName(i);
return ret;
}
@@ -328,7 +333,7 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
* @return
*/
public boolean isColNameDefault(int i) {
- return _colnames.get(i).equals("C"+i);
+ return _colnames[i].equals("C"+(i+1));
}
/**
@@ -339,7 +344,7 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
int card = 0;
for( int i=0; i<getNumRows(); i++ )
card += (get(i, j) != null) ? 1 : 0;
- _colmeta.get(j).setNumDistinct(card);
+ _colmeta[j].setNumDistinct(card);
}
}
@@ -354,7 +359,7 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
* @return
*/
public Object get(int r, int c) {
- return _coldata.get(c).get(r);
+ return _coldata[c].get(r);
}
/**
@@ -366,7 +371,7 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
* @param val
*/
public void set(int r, int c, Object val) {
- _coldata.get(c).set(r, UtilFunctions.objectToObject(_schema.get(c), val));
+ _coldata[c].set(r, UtilFunctions.objectToObject(_schema[c], val));
}
/**
@@ -376,17 +381,17 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
*/
public void reset(int nrow, boolean clearMeta) {
if( clearMeta ) {
- getSchema().clear();
- getColumnNames().clear();
+ _schema = null;
+ _colnames = null;
if( _colmeta != null ) {
- for( int i=0; i<_colmeta.size(); i++ )
+ for( int i=0; i<_colmeta.length; i++ )
if( !isColumnMetadataDefault(i) )
- _colmeta.set(i, new ColumnMetadata(0));
+ _colmeta[i] = new ColumnMetadata(0);
}
}
if(_coldata != null) {
- for( int i=0; i < _coldata.size(); i++ )
- _coldata.get(i)._size = nrow;
+ for( int i=0; i < _coldata.length; i++ )
+ _coldata[i]._size = nrow;
}
}
@@ -407,7 +412,7 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
public void appendRow(Object[] row) {
ensureAllocatedColumns(0);
for( int j=0; j<row.length; j++ )
- _coldata.get(j).append(row[j]);
+ _coldata[j].append(row[j]);
_numRows++;
}
@@ -420,7 +425,7 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
public void appendRow(String[] row) {
ensureAllocatedColumns(0);
for( int j=0; j<row.length; j++ )
- _coldata.get(j).append(row[j]);
+ _coldata[j].append(row[j]);
_numRows++;
}
@@ -433,9 +438,10 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
*/
public void appendColumn(String[] col) {
ensureColumnCompatibility(col.length);
- _schema.add(ValueType.STRING);
- _colnames.add(createColName(_schema.size()));
- _coldata.add(new StringArray(col));
+ _schema = ArrayUtils.add(_schema, ValueType.STRING);
+ _colnames = ArrayUtils.add(_colnames, createColName(_schema.length));
+ _coldata = (_coldata==null) ? new Array[]{new StringArray(col)} :
+ ArrayUtils.add(_coldata, new StringArray(col));
_numRows = col.length;
}
@@ -448,9 +454,10 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
*/
public void appendColumn(boolean[] col) {
ensureColumnCompatibility(col.length);
- _schema.add(ValueType.BOOLEAN);
- _colnames.add(createColName(_schema.size()));
- _coldata.add(new BooleanArray(col));
+ _schema = ArrayUtils.add(_schema, ValueType.BOOLEAN);
+ _colnames = ArrayUtils.add(_colnames, createColName(_schema.length));
+ _coldata = (_coldata==null) ? new Array[]{new BooleanArray(col)} :
+ ArrayUtils.add(_coldata, new BooleanArray(col));
_numRows = col.length;
}
@@ -463,9 +470,10 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
*/
public void appendColumn(long[] col) {
ensureColumnCompatibility(col.length);
- _schema.add(ValueType.INT);
- _colnames.add(createColName(_schema.size()));
- _coldata.add(new LongArray(col));
+ _schema = ArrayUtils.add(_schema, ValueType.INT);
+ _colnames = ArrayUtils.add(_colnames, createColName(_schema.length));
+ _coldata = (_coldata==null) ? new Array[]{new LongArray(col)} :
+ ArrayUtils.add(_coldata, new LongArray(col));
_numRows = col.length;
}
@@ -478,9 +486,10 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
*/
public void appendColumn(double[] col) {
ensureColumnCompatibility(col.length);
- _schema.add(ValueType.DOUBLE);
- _colnames.add(createColName(_schema.size()));
- _coldata.add(new DoubleArray(col));
+ _schema = ArrayUtils.add(_schema, ValueType.DOUBLE);
+ _colnames = ArrayUtils.add(_colnames, createColName(_schema.length));
+ _coldata = (_coldata==null) ? new Array[]{new DoubleArray(col)} :
+ ArrayUtils.add(_coldata, new DoubleArray(col));
_numRows = col.length;
}
@@ -490,11 +499,11 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
* @return
*/
public Object getColumn(int c) {
- switch(_schema.get(c)) {
- case STRING: return ((StringArray)_coldata.get(c))._data;
- case BOOLEAN: return ((BooleanArray)_coldata.get(c))._data;
- case INT: return ((LongArray)_coldata.get(c))._data;
- case DOUBLE: return ((DoubleArray)_coldata.get(c))._data;
+ switch(_schema[c]) {
+ case STRING: return ((StringArray)_coldata[c])._data;
+ case BOOLEAN: return ((BooleanArray)_coldata[c])._data;
+ case INT: return ((LongArray)_coldata[c])._data;
+ case DOUBLE: return ((DoubleArray)_coldata[c])._data;
default: return null;
}
}
@@ -556,14 +565,14 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
out.writeBoolean(isDefaultMeta);
//write columns (value type, data)
for( int j=0; j<getNumColumns(); j++ ) {
- out.writeByte(_schema.get(j).ordinal());
+ out.writeByte(_schema[j].ordinal());
if( !isDefaultMeta ) {
- out.writeUTF(_colnames.get(j));
- out.writeLong(_colmeta.get(j).getNumDistinct());
- out.writeUTF( (_colmeta.get(j).getMvValue()!=null) ?
- _colmeta.get(j).getMvValue() : "" );
+ out.writeUTF(_colnames[j]);
+ out.writeLong(_colmeta[j].getNumDistinct());
+ out.writeUTF( (_colmeta[j].getMvValue()!=null) ?
+ _colmeta[j].getMvValue() : "" );
}
- _coldata.get(j).write(out);
+ _coldata[j].write(out);
}
}
@@ -573,10 +582,16 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
_numRows = in.readInt();
int numCols = in.readInt();
boolean isDefaultMeta = in.readBoolean();
+ //allocate schema/meta data arrays
+ _schema = (_schema!=null && _schema.length==numCols) ?
+ _schema : new ValueType[numCols];
+ _colnames = (_colnames != null && _colnames.length==numCols) ?
+ _colnames : new String[numCols];
+ _colmeta = (_colmeta != null && _colmeta.length==numCols) ?
+ _colmeta : new ColumnMetadata[numCols];
+ _coldata = (_coldata!=null && _coldata.length==numCols) ?
+ _coldata : new Array[numCols];
//read columns (value type, meta, data)
- _schema.clear();
- _colmeta.clear();
- _coldata.clear();
for( int j=0; j<numCols; j++ ) {
ValueType vt = ValueType.values()[in.readByte()];
String name = isDefaultMeta ? createColName(j) : in.readUTF();
@@ -591,11 +606,11 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
default: throw new IOException("Unsupported value type: "+vt);
}
arr.readFields(in);
- _schema.add(vt);
- _colnames.add(name);
- _colmeta.add(new ColumnMetadata(ndistinct,
- (mvvalue==null || mvvalue.isEmpty()) ? null : mvvalue));
- _coldata.add(arr);
+ _schema[j] = vt;
+ _colnames[j] = name;
+ _colmeta[j] = new ColumnMetadata(ndistinct,
+ (mvvalue==null || mvvalue.isEmpty()) ? null : mvvalue);
+ _coldata[j] = arr;
}
}
@@ -678,16 +693,17 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
if( ret == null )
ret = new FrameBlock();
ret._numRows = _numRows;
- ret._schema = new ArrayList<ValueType>(_schema);
- ret._colnames = new ArrayList<String>(_colnames);
- ret._colmeta = new ArrayList<ColumnMetadata>(_colmeta);
+ ret._schema = _schema.clone();
+ ret._colnames = _colnames.clone();
+ ret._colmeta = _colmeta.clone();
+ ret._coldata = new Array[getNumColumns()];
//copy data to output and partial overwrite w/ rhs
for( int j=0; j<getNumColumns(); j++ ) {
- Array tmp = _coldata.get(j).clone();
+ Array tmp = _coldata[j].clone();
if( j>=cl && j<=cu )
- tmp.set(rl, ru, rhsFrame._coldata.get(j-cl));
- ret._coldata.add(tmp);
+ tmp.set(rl, ru, rhsFrame._coldata[j-cl]);
+ ret._coldata[j] = tmp;
}
return ret;
@@ -737,20 +753,27 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
ret.reset(ru-rl+1, true);
//copy output schema and colnames
+ int numCols = cu-cl+1;
+ ret._schema = new ValueType[numCols];
+ ret._colnames = new String[numCols];
+ ret._colmeta = new ColumnMetadata[numCols];
+
for( int j=cl; j<=cu; j++ ) {
- ret._schema.add(_schema.get(j));
- ret._colnames.add(_colnames.get(j));
- ret._colmeta.add(_colmeta.get(j));
+ ret._schema[j-cl] = _schema[j];
+ ret._colnames[j-cl] = _colnames[j];
+ ret._colmeta[j-cl] = _colmeta[j];
}
ret._numRows = ru-rl+1;
//copy output data
- if(ret._coldata.size() == 0)
+ if(ret._coldata == null ) {
+ ret._coldata = new Array[numCols];
for( int j=cl; j<=cu; j++ )
- ret._coldata.add(_coldata.get(j).slice(rl,ru));
+ ret._coldata[j-cl] = _coldata[j].slice(rl,ru);
+ }
else
for( int j=cl; j<=cu; j++ )
- ret._coldata.get(j-cl).set(0, ru-rl, _coldata.get(j), rl);
+ ret._coldata[j-cl].set(0, ru-rl, _coldata[j], rl);
return ret;
}
@@ -818,18 +841,14 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
ret._numRows = _numRows;
//concatenate schemas (w/ deep copy to prevent side effects)
- ret._schema = new ArrayList<ValueType>(_schema);
- ret._schema.addAll(that._schema);
- ret._colnames = new ArrayList<String>(_colnames);
- ret._colnames.addAll(that._colnames);
- ret._colmeta = new ArrayList<ColumnMetadata>(_colmeta);
- ret._colmeta.addAll(that._colmeta);
+ ret._schema = ArrayUtils.addAll(_schema, that._schema);
+ ret._colnames = ArrayUtils.addAll(_colnames, that._colnames);
+ ret._colmeta = ArrayUtils.addAll(_colmeta, that._colmeta);
//concatenate column data (w/ deep copy to prevent side effects)
- for( Array tmp : _coldata )
- ret._coldata.add(tmp.clone());
- for( Array tmp : that._coldata )
- ret._coldata.add(tmp.clone());
+ ret._coldata = ArrayUtils.addAll(_coldata, that._coldata);
+ for( int i=0; i<ret._coldata.length; i++ )
+ ret._coldata[i] = ret._coldata[i].clone();
}
else //ROW APPEND
{
@@ -843,12 +862,13 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
if( ret == null )
ret = new FrameBlock();
ret._numRows = _numRows;
- ret._schema = new ArrayList<ValueType>(_schema);
- ret._colnames = new ArrayList<String>(_colnames);
+ ret._schema = _schema.clone();
+ ret._colnames = _colnames.clone();
//concatenate data (deep copy first, append second)
- for( Array tmp : _coldata )
- ret._coldata.add(tmp.clone());
+ ret._coldata = new Array[_coldata.length];
+ for( int j=0; j<_coldata.length; j++ )
+ ret._coldata[j] = _coldata[j].clone();
Iterator<Object[]> iter = that.getObjectRowIterator();
while( iter.hasNext() )
ret.appendRow(iter.next());
@@ -882,13 +902,13 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
//copy values
for( int j=cl; j<=cu; j++ ) {
//special case: column memcopy
- if( _schema.get(j).equals(src._schema.get(j-cl)) )
- _coldata.get(j).set(rl, ru, src._coldata.get(j-cl));
+ if( _schema[j].equals(src._schema[j-cl]) )
+ _coldata[j].set(rl, ru, src._coldata[j-cl]);
//general case w/ schema transformation
else
for( int i=rl; i<=ru; i++ ) {
String tmp = src.get(i-rl, j-cl)!=null ? src.get(i-rl, j-cl).toString() : null;
- set(i, j, UtilFunctions.stringToObject(_schema.get(j), tmp));
+ set(i, j, UtilFunctions.stringToObject(_schema[j], tmp));
}
}
}
@@ -912,7 +932,7 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
//construct recode map
HashMap<String,Long> map = new HashMap<String,Long>();
- Array ldata = _coldata.get(col);
+ Array ldata = _coldata[col];
for( int i=0; i<getNumRows(); i++ ) {
Object val = ldata.get(i);
if( val != null ) {
@@ -955,21 +975,21 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
//meta data copy if necessary
for( int j=0; j<getNumColumns(); j++ )
if( !that.isColumnMetadataDefault(j) ) {
- _colmeta.get(j).setNumDistinct(that._colmeta.get(j).getNumDistinct());
- _colmeta.get(j).setMvValue(that._colmeta.get(j).getMvValue());
+ _colmeta[j].setNumDistinct(that._colmeta[j].getNumDistinct());
+ _colmeta[j].setMvValue(that._colmeta[j].getMvValue());
}
//core frame block merge through cell copy
//with column-wide access pattern
for( int j=0; j<getNumColumns(); j++ ) {
//special case: copy non-zeros of column
- if( _schema.get(j).equals(that._schema.get(j)) )
- _coldata.get(j).setNz(0, _numRows-1, that._coldata.get(j));
+ if( _schema[j].equals(that._schema[j]) )
+ _coldata[j].setNz(0, _numRows-1, that._coldata[j]);
//general case w/ schema transformation
else {
for( int i=0; i<_numRows; i++ ) {
Object obj = UtilFunctions.objectToObject(
- getSchema().get(j), that.get(i,j), true);
+ _schema[j], that.get(i,j), true);
if (obj != null) //merge non-zeros
set(i, j,obj);
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/main/java/org/apache/sysml/runtime/matrix/data/OperationsOnMatrixValues.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/OperationsOnMatrixValues.java b/src/main/java/org/apache/sysml/runtime/matrix/data/OperationsOnMatrixValues.java
index f2c7ecb..bd457ca 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/OperationsOnMatrixValues.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/OperationsOnMatrixValues.java
@@ -21,8 +21,7 @@
package org.apache.sysml.runtime.matrix.data;
import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
+import java.util.Arrays;
import org.apache.sysml.parser.Expression.ValueType;
import org.apache.sysml.runtime.DMLRuntimeException;
@@ -569,7 +568,7 @@ public class OperationsOnMatrixValues
//allocate space for the output value
for(long r=resultBlockIndexTop; r<=resultBlockIndexBottom; r++)
{
- List<ValueType> schema = UtilFunctions.getSubSchema(block.getSchema(), tmpRange.colStart, tmpRange.colEnd);
+ ValueType[] schema = Arrays.copyOfRange(block.getSchema(), (int)tmpRange.colStart, (int)tmpRange.colEnd+1);
long iResultIndex = Math.max(((r-1)*brlen - ixrange.rowStart + 1), 0);
Pair<Long,FrameBlock> out=new Pair<Long,FrameBlock>(new Long(iResultIndex+1), new FrameBlock(schema));
outlist.add(out);
@@ -631,12 +630,11 @@ public class OperationsOnMatrixValues
int lbclen = clenLeft;
- List<ValueType> schemaPartialLeft = Collections.nCopies(lhs_lcl, ValueType.STRING);
- List<ValueType> schemaRHS = UtilFunctions.getSubSchema(fb.getSchema(), rhs_lcl, rhs_lcl-lhs_lcl+lhs_lcu);
- List<ValueType> schema = new ArrayList<ValueType>(schemaPartialLeft);
- schema.addAll(schemaRHS);
- List<ValueType> schemaPartialRight = Collections.nCopies(lbclen-schema.size(), ValueType.STRING);
- schema.addAll(schemaPartialRight);
+ ValueType[] schemaPartialLeft = UtilFunctions.nCopies(lhs_lcl, ValueType.STRING);
+ ValueType[] schemaRHS = Arrays.copyOfRange(fb.getSchema(), (int)(rhs_lcl), (int)(rhs_lcl-lhs_lcl+lhs_lcu+1));
+ ValueType[] schema = UtilFunctions.copyOf(schemaPartialLeft, schemaRHS);
+ ValueType[] schemaPartialRight = UtilFunctions.nCopies(lbclen-schema.length, ValueType.STRING);
+ schema = UtilFunctions.copyOf(schema, schemaPartialRight);
FrameBlock resultBlock = new FrameBlock(schema);
int iRHSRows = (int)(leftRowIndex<=rlen/brlenLeft?brlenLeft:rlen-(rlen/brlenLeft)*brlenLeft);
resultBlock.ensureAllocatedColumns(iRHSRows);
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/main/java/org/apache/sysml/runtime/matrix/mapred/FrameReblockBuffer.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/mapred/FrameReblockBuffer.java b/src/main/java/org/apache/sysml/runtime/matrix/mapred/FrameReblockBuffer.java
index e844be1..41615c5 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/mapred/FrameReblockBuffer.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/mapred/FrameReblockBuffer.java
@@ -24,7 +24,6 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
-import java.util.List;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.OutputCollector;
@@ -54,7 +53,7 @@ public class FrameReblockBuffer
private int _brlen = -1;
private int _bclen = -1;
- private List<ValueType> _schema;
+ private ValueType[] _schema;
/**
@@ -64,7 +63,7 @@ public class FrameReblockBuffer
* @return
*
*/
- public FrameReblockBuffer( long rlen, long clen, List<ValueType> schema )
+ public FrameReblockBuffer( long rlen, long clen, ValueType[] schema )
{
this( DEFAULT_BUFFER_SIZE, rlen, clen, schema );
}
@@ -77,7 +76,7 @@ public class FrameReblockBuffer
* @return
*
*/
- public FrameReblockBuffer( int buffersize, long rlen, long clen, List<ValueType> schema )
+ public FrameReblockBuffer( int buffersize, long rlen, long clen, ValueType[] schema )
{
_bufflen = buffersize;
_count = 0;
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/main/java/org/apache/sysml/runtime/transform/BinAgent.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/BinAgent.java b/src/main/java/org/apache/sysml/runtime/transform/BinAgent.java
index ee0e56a..46090c0 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/BinAgent.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/BinAgent.java
@@ -68,7 +68,7 @@ public class BinAgent extends Encoder
super( null, clen );
}
- public BinAgent(JSONObject parsedSpec, List<String> colnames, int clen)
+ public BinAgent(JSONObject parsedSpec, String[] colnames, int clen)
throws JSONException, IOException
{
this(parsedSpec, colnames, clen, false);
@@ -81,7 +81,7 @@ public class BinAgent extends Encoder
* @throws JSONException
* @throws IOException
*/
- public BinAgent(JSONObject parsedSpec, List<String> colnames, int clen, boolean colsOnly)
+ public BinAgent(JSONObject parsedSpec, String[] colnames, int clen, boolean colsOnly)
throws JSONException, IOException
{
super( null, clen );
@@ -364,7 +364,7 @@ public class BinAgent extends Encoder
int colID = _colList[j];
for( int i=0; i<in.getNumRows(); i++ ) {
double inVal = UtilFunctions.objectToDouble(
- in.getSchema().get(colID-1), in.get(i, colID-1));
+ in.getSchema()[colID-1], in.get(i, colID-1));
int ix = Arrays.binarySearch(_binMaxs[j], inVal);
int binID = ((ix < 0) ? Math.abs(ix+1) : ix) + 1;
out.quickSetValue(i, colID-1, binID);
@@ -384,7 +384,7 @@ public class BinAgent extends Encoder
_binMaxs = new double[_colList.length][];
for( int j=0; j<_colList.length; j++ ) {
int colID = _colList[j]; //1-based
- int nbins = (int)meta.getColumnMetadata().get(colID-1).getNumDistinct();
+ int nbins = (int)meta.getColumnMetadata()[colID-1].getNumDistinct();
_binMins[j] = new double[nbins];
_binMaxs[j] = new double[nbins];
for( int i=0; i<nbins; i++ ) {
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/main/java/org/apache/sysml/runtime/transform/DummycodeAgent.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/DummycodeAgent.java b/src/main/java/org/apache/sysml/runtime/transform/DummycodeAgent.java
index b51d639..e0b4826 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/DummycodeAgent.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/DummycodeAgent.java
@@ -64,7 +64,7 @@ public class DummycodeAgent extends Encoder
super(list, clen);
}
- public DummycodeAgent(JSONObject parsedSpec, List<String> colnames, int clen) throws JSONException {
+ public DummycodeAgent(JSONObject parsedSpec, String[] colnames, int clen) throws JSONException {
super(null, clen);
if ( parsedSpec.containsKey(TfUtils.TXMETHOD_DUMMYCODE) ) {
@@ -443,7 +443,7 @@ public class DummycodeAgent extends Encoder
idx++;
}
else {
- double ptval = UtilFunctions.objectToDouble(in.getSchema().get(colID-1), in.get(i, colID-1));
+ double ptval = UtilFunctions.objectToDouble(in.getSchema()[colID-1], in.get(i, colID-1));
ret.quickSetValue(i, ncolID-1, ptval);
ncolID++;
}
@@ -465,7 +465,7 @@ public class DummycodeAgent extends Encoder
_dummycodedLength = _clen;
for( int j=0; j<_colList.length; j++ ) {
int colID = _colList[j]; //1-based
- _domainSizes[j] = (int)meta.getColumnMetadata().get(colID-1).getNumDistinct();
+ _domainSizes[j] = (int)meta.getColumnMetadata()[colID-1].getNumDistinct();
_dummycodedLength += _domainSizes[j]-1;
}
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/main/java/org/apache/sysml/runtime/transform/MVImputeAgent.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/MVImputeAgent.java b/src/main/java/org/apache/sysml/runtime/transform/MVImputeAgent.java
index 1266ced..4ff93a5 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/MVImputeAgent.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/MVImputeAgent.java
@@ -98,7 +98,7 @@ public class MVImputeAgent extends Encoder
public KahanObject[] getMeans_scnomv() { return _scnomvMeanList; }
public CM_COV_Object[] getVars_scnomv() { return _scnomvVarList; }
- public MVImputeAgent(JSONObject parsedSpec, List<String> colnames, int clen)
+ public MVImputeAgent(JSONObject parsedSpec, String[] colnames, int clen)
throws JSONException
{
super(null, clen);
@@ -114,7 +114,7 @@ public class MVImputeAgent extends Encoder
_hist = new HashMap<Integer, HashMap<String,Long>>();
}
- public MVImputeAgent(JSONObject parsedSpec, String[] NAstrings, int clen)
+ public MVImputeAgent(JSONObject parsedSpec, String[] colnames, String[] NAstrings, int clen)
throws JSONException
{
super(null, clen);
@@ -947,7 +947,7 @@ public class MVImputeAgent extends Encoder
long off = _countList[j];
for( int i=0; i<in.getNumRows(); i++ )
_meanFn.execute2(_meanList[j], UtilFunctions.objectToDouble(
- in.getSchema().get(colID-1), in.get(i, colID-1)), off+i+1);
+ in.getSchema()[colID-1], in.get(i, colID-1)), off+i+1);
_replacementList[j] = String.valueOf(_meanList[j]._sum);
_countList[j] += in.getNumRows();
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/main/java/org/apache/sysml/runtime/transform/OmitAgent.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/OmitAgent.java b/src/main/java/org/apache/sysml/runtime/transform/OmitAgent.java
index de6d59f..982f4b9 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/OmitAgent.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/OmitAgent.java
@@ -21,7 +21,6 @@ package org.apache.sysml.runtime.transform;
import java.io.IOException;
import java.util.Iterator;
-import java.util.List;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -50,7 +49,7 @@ public class OmitAgent extends Encoder
super(list, clen);
}
- public OmitAgent(JSONObject parsedSpec, List<String> colnames, int clen)
+ public OmitAgent(JSONObject parsedSpec, String[] colnames, int clen)
throws JSONException
{
super(null, clen);
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/main/java/org/apache/sysml/runtime/transform/RecodeAgent.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/RecodeAgent.java b/src/main/java/org/apache/sysml/runtime/transform/RecodeAgent.java
index edfdff4..8ec2db3 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/RecodeAgent.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/RecodeAgent.java
@@ -59,7 +59,7 @@ public class RecodeAgent extends Encoder
private HashMap<Integer, HashMap<String, Long>> _rcdMaps = new HashMap<Integer, HashMap<String, Long>>();
private HashMap<Integer, HashMap<String,String>> _finalMaps = null;
- public RecodeAgent(JSONObject parsedSpec, List<String> colnames, int clen)
+ public RecodeAgent(JSONObject parsedSpec, String[] colnames, int clen)
throws JSONException
{
super(null, clen);
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/main/java/org/apache/sysml/runtime/transform/TfUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/TfUtils.java b/src/main/java/org/apache/sysml/runtime/transform/TfUtils.java
index dd18b43..7a5da65 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/TfUtils.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/TfUtils.java
@@ -23,7 +23,6 @@ import java.io.EOFException;
import java.io.IOException;
import java.io.Serializable;
import java.util.Arrays;
-import java.util.List;
import java.util.regex.Pattern;
import org.apache.hadoop.filecache.DistributedCache;
@@ -250,13 +249,11 @@ public class TfUtils implements Serializable{
private void createAgents(JSONObject spec, String[] naStrings)
throws IOException, JSONException
{
- List<String> colnames = Arrays.asList(_outputColumnNames);
-
- _oa = new OmitAgent(spec, colnames, _numInputCols);
- _mia = new MVImputeAgent(spec, naStrings, _numInputCols);
- _ra = new RecodeAgent(spec, colnames, _numInputCols);
- _ba = new BinAgent(spec, colnames, _numInputCols);
- _da = new DummycodeAgent(spec, colnames, _numInputCols);
+ _oa = new OmitAgent(spec, _outputColumnNames, _numInputCols);
+ _mia = new MVImputeAgent(spec, null, naStrings, _numInputCols);
+ _ra = new RecodeAgent(spec, _outputColumnNames, _numInputCols);
+ _ba = new BinAgent(spec, _outputColumnNames, _numInputCols);
+ _da = new DummycodeAgent(spec, _outputColumnNames, _numInputCols);
}
public void setupAgents(OmitAgent oa, MVImputeAgent mia, RecodeAgent ra, BinAgent ba, DummycodeAgent da) {
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderDummycode.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderDummycode.java b/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderDummycode.java
index 2916742..3495cf3 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderDummycode.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderDummycode.java
@@ -52,7 +52,7 @@ public class DecoderDummycode extends Decoder
if( in.quickGetValue(i, k-1) != 0 ) {
int col = _colList[j] - 1;
out.set(i, col, UtilFunctions.doubleToObject(
- out.getSchema().get(col), k-_clPos[j]+1));
+ out.getSchema()[col], k-_clPos[j]+1));
}
return out;
}
@@ -63,8 +63,8 @@ public class DecoderDummycode extends Decoder
_cuPos = new int[_colList.length]; //col upper pos
for( int j=0, off=0; j<_colList.length; j++ ) {
int colID = _colList[j];
- int ndist = (int)meta.getColumnMetadata()
- .get(colID-1).getNumDistinct();
+ int ndist = (int)meta.getColumnMetadata()[colID-1]
+ .getNumDistinct();
_clPos[j] = off + colID;
_cuPos[j] = _clPos[j] + ndist;
off += ndist - 1;
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderFactory.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderFactory.java b/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderFactory.java
index f276015..facfff8 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderFactory.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderFactory.java
@@ -46,7 +46,7 @@ public class DecoderFactory
* @throws DMLRuntimeException
*/
@SuppressWarnings("unchecked")
- public static Decoder createDecoder(String spec, List<String> colnames, List<ValueType> schema, FrameBlock meta)
+ public static Decoder createDecoder(String spec, String[] colnames, List<ValueType> schema, FrameBlock meta)
throws DMLRuntimeException
{
Decoder decoder = null;
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderPassThrough.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderPassThrough.java b/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderPassThrough.java
index d2bf7fa..1ee0568 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderPassThrough.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderPassThrough.java
@@ -70,8 +70,8 @@ public class DecoderPassThrough extends Decoder
ix1 ++;
}
else { //_colList[ix1] > _dcCols[ix2]
- off += (int)meta.getColumnMetadata()
- .get(_dcCols[ix2]-1).getNumDistinct() - 1;
+ off += (int)meta.getColumnMetadata()[_dcCols[ix2]-1]
+ .getNumDistinct() - 1;
ix2 ++;
}
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderRecode.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderRecode.java b/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderRecode.java
index 5484ded..42a0da9 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderRecode.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderRecode.java
@@ -54,7 +54,7 @@ public class DecoderRecode extends Decoder
for( int j=0; j<_colList.length; j++ ) {
int colID = _colList[j];
double val = UtilFunctions.objectToDouble(
- out.getSchema().get(colID-1), out.get(i, colID-1));
+ out.getSchema()[colID-1], out.get(i, colID-1));
long key = UtilFunctions.toLong(val);
out.set(i, colID-1, _rcMaps[j].get(key));
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderFactory.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderFactory.java b/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderFactory.java
index 8adea7b..b71f563 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderFactory.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderFactory.java
@@ -21,7 +21,6 @@ package org.apache.sysml.runtime.transform.encode;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.Collections;
import java.util.List;
import org.apache.commons.collections.CollectionUtils;
@@ -48,8 +47,8 @@ public class EncoderFactory
* @return
* @throws DMLRuntimeException
*/
- public static Encoder createEncoder(String spec, List<String> colnames, int clen, FrameBlock meta) throws DMLRuntimeException {
- return createEncoder(spec, colnames, Collections.nCopies(clen, ValueType.STRING), meta);
+ public static Encoder createEncoder(String spec, String[] colnames, int clen, FrameBlock meta) throws DMLRuntimeException {
+ return createEncoder(spec, colnames, UtilFunctions.nCopies(clen, ValueType.STRING), meta);
}
/**
@@ -61,8 +60,8 @@ public class EncoderFactory
* @return
* @throws DMLRuntimeException
*/
- public static Encoder createEncoder(String spec, List<String> colnames, List<ValueType> schema, int clen, FrameBlock meta) throws DMLRuntimeException {
- List<ValueType> lschema = (schema==null) ? Collections.nCopies(clen, ValueType.STRING) : schema;
+ public static Encoder createEncoder(String spec, String[] colnames, ValueType[] schema, int clen, FrameBlock meta) throws DMLRuntimeException {
+ ValueType[] lschema = (schema==null) ? UtilFunctions.nCopies(clen, ValueType.STRING) : schema;
return createEncoder(spec, colnames, lschema, meta);
}
@@ -75,11 +74,11 @@ public class EncoderFactory
* @throws DMLRuntimeException
*/
@SuppressWarnings("unchecked")
- public static Encoder createEncoder(String spec, List<String> colnames, List<ValueType> schema, FrameBlock meta)
+ public static Encoder createEncoder(String spec, String[] colnames, ValueType[] schema, FrameBlock meta)
throws DMLRuntimeException
{
Encoder encoder = null;
- int clen = schema.size();
+ int clen = schema.length;
try {
//parse transform specification
@@ -111,13 +110,13 @@ public class EncoderFactory
lencoders.add(new EncoderPassThrough(
ArrayUtils.toPrimitive(ptIDs.toArray(new Integer[0])), clen));
if( !dcIDs.isEmpty() )
- lencoders.add(new DummycodeAgent(jSpec, colnames, schema.size()));
+ lencoders.add(new DummycodeAgent(jSpec, colnames, schema.length));
if( !binIDs.isEmpty() )
- lencoders.add(new BinAgent(jSpec, colnames, schema.size(), true));
+ lencoders.add(new BinAgent(jSpec, colnames, schema.length, true));
if( !oIDs.isEmpty() )
- lencoders.add(new OmitAgent(jSpec, colnames, schema.size()));
+ lencoders.add(new OmitAgent(jSpec, colnames, schema.length));
if( !mvIDs.isEmpty() ) {
- MVImputeAgent ma = new MVImputeAgent(jSpec, colnames, schema.size());
+ MVImputeAgent ma = new MVImputeAgent(jSpec, colnames, schema.length);
ma.initRecodeIDList(rcIDs);
lencoders.add(ma);
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderPassThrough.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderPassThrough.java b/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderPassThrough.java
index ab146ce..08722fd 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderPassThrough.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderPassThrough.java
@@ -67,7 +67,7 @@ public class EncoderPassThrough extends Encoder
public MatrixBlock apply(FrameBlock in, MatrixBlock out) {
for( int j=0; j<_colList.length; j++ ) {
int col = _colList[j]-1;
- ValueType vt = in.getSchema().get(col);
+ ValueType vt = in.getSchema()[col];
for( int i=0; i<in.getNumRows(); i++ ) {
Object val = in.get(i, col);
out.quickSetValue(i, col, (val==null||(vt==ValueType.STRING
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/main/java/org/apache/sysml/runtime/transform/meta/TfMetaUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/meta/TfMetaUtils.java b/src/main/java/org/apache/sysml/runtime/transform/meta/TfMetaUtils.java
index de883f3..d12ff1d 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/meta/TfMetaUtils.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/meta/TfMetaUtils.java
@@ -27,11 +27,12 @@ import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map.Entry;
+import jodd.util.ArraysUtil;
+
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.sysml.api.jmlc.Connection;
@@ -74,7 +75,7 @@ public class TfMetaUtils
* @return
* @throws DMLRuntimeException
*/
- public static boolean containsOmitSpec(String spec, List<String> colnames) throws DMLRuntimeException {
+ public static boolean containsOmitSpec(String spec, String[] colnames) throws DMLRuntimeException {
return (TfMetaUtils.parseJsonIDList(spec, colnames, TfUtils.TXMETHOD_OMIT).length > 0);
}
@@ -86,7 +87,7 @@ public class TfMetaUtils
* @return
* @throws DMLRuntimeException
*/
- public static int[] parseJsonIDList(String spec, List<String> colnames, String group)
+ public static int[] parseJsonIDList(String spec, String[] colnames, String group)
throws DMLRuntimeException
{
try {
@@ -107,7 +108,7 @@ public class TfMetaUtils
* @return
* @throws JSONException
*/
- public static int[] parseJsonIDList(JSONObject spec, List<String> colnames, String group)
+ public static int[] parseJsonIDList(JSONObject spec, String[] colnames, String group)
throws JSONException
{
int[] colList = new int[0];
@@ -127,7 +128,7 @@ public class TfMetaUtils
colList = new int[attrs.size()];
for(int i=0; i < colList.length; i++) {
colList[i] = ids ? UtilFunctions.toInt(attrs.get(i)) :
- (colnames.indexOf(attrs.get(i)) + 1);
+ (ArraysUtil.indexOf(colnames, attrs.get(i)) + 1);
if( colList[i] <= 0 ) {
throw new RuntimeException("Specified column '" +
attrs.get(i)+"' does not exist.");
@@ -148,7 +149,7 @@ public class TfMetaUtils
* @return
* @throws JSONException
*/
- public static int[] parseJsonObjectIDList(JSONObject spec, List<String> colnames, String group)
+ public static int[] parseJsonObjectIDList(JSONObject spec, String[] colnames, String group)
throws JSONException
{
int[] colList = new int[0];
@@ -161,7 +162,7 @@ public class TfMetaUtils
for(int j=0; j<colspecs.size(); j++) {
JSONObject colspec = (JSONObject) colspecs.get(j);
colList[j] = ids ? colspec.getInt("id") :
- (colnames.indexOf(colspec.get("name")) + 1);
+ (ArrayUtils.indexOf(colnames, colspec.get("name")) + 1);
if( colList[j] <= 0 ) {
throw new RuntimeException("Specified column '" +
colspec.get(ids?"id":"name")+"' does not exist.");
@@ -190,15 +191,15 @@ public class TfMetaUtils
{
//read column names
String colnamesStr = MapReduceTool.readStringFromHDFSFile(metapath+File.separator+TfUtils.TXMTD_COLNAMES);
- List<String> colnames = Arrays.asList(IOUtilFunctions.split(colnamesStr.trim(), colDelim));
+ String[] colnames = IOUtilFunctions.split(colnamesStr.trim(), colDelim);
//read meta data (currently supported: recode, dummycode, bin, omit, impute)
//note: recode/binning and impute might be applied on the same column
HashMap<String,String> meta = new HashMap<String,String>();
HashMap<String,String> mvmeta = new HashMap<String,String>();
int rows = 0;
- for( int j=0; j<colnames.size(); j++ ) {
- String colName = colnames.get(j);
+ for( int j=0; j<colnames.length; j++ ) {
+ String colName = colnames[j];
//read recode maps for recoded or dummycoded columns
String name = metapath+File.separator+"Recode"+File.separator+colName;
if( MapReduceTool.existsFileOnHDFS(name+TfUtils.TXMTD_RCD_MAP_SUFFIX) ) {
@@ -244,15 +245,15 @@ public class TfMetaUtils
{
//read column names
String colnamesStr = IOUtilFunctions.toString(Connection.class.getResourceAsStream(metapath+"/"+TfUtils.TXMTD_COLNAMES));
- List<String> colnames = Arrays.asList(IOUtilFunctions.split(colnamesStr.trim(), colDelim));
+ String[] colnames = IOUtilFunctions.split(colnamesStr.trim(), colDelim);
//read meta data (currently supported: recode, dummycode, bin, omit)
//note: recode/binning and impute might be applied on the same column
HashMap<String,String> meta = new HashMap<String,String>();
HashMap<String,String> mvmeta = new HashMap<String,String>();
int rows = 0;
- for( int j=0; j<colnames.size(); j++ ) {
- String colName = colnames.get(j);
+ for( int j=0; j<colnames.length; j++ ) {
+ String colName = colnames[j];
//read recode maps for recoded or dummycoded columns
String name = metapath+"/"+"Recode"+"/"+colName;
String map = IOUtilFunctions.toString(Connection.class.getResourceAsStream(name+TfUtils.TXMTD_RCD_MAP_SUFFIX));
@@ -294,18 +295,18 @@ public class TfMetaUtils
* @return
* @throws IOException
*/
- private static FrameBlock convertToTransformMetaDataFrame(int rows, List<String> colnames, List<Integer> rcIDs, List<Integer> binIDs,
+ private static FrameBlock convertToTransformMetaDataFrame(int rows, String[] colnames, List<Integer> rcIDs, List<Integer> binIDs,
HashMap<String,String> meta, HashMap<String,String> mvmeta)
throws IOException
{
//create frame block w/ pure string schema
- List<ValueType> schema = Collections.nCopies(colnames.size(), ValueType.STRING);
+ ValueType[] schema = UtilFunctions.nCopies(colnames.length, ValueType.STRING);
FrameBlock ret = new FrameBlock(schema, colnames);
ret.ensureAllocatedColumns(rows);
//encode recode maps (recoding/dummycoding) into frame
for( Integer colID : rcIDs ) {
- String name = colnames.get(colID-1);
+ String name = colnames[colID-1];
String map = meta.get(name);
if( map == null )
throw new IOException("Recode map for column '"+name+"' (id="+colID+") not existing.");
@@ -324,7 +325,7 @@ public class TfMetaUtils
//encode bin maps (binning) into frame
for( Integer colID : binIDs ) {
- String name = colnames.get(colID-1);
+ String name = colnames[colID-1];
String map = meta.get(name);
if( map == null )
throw new IOException("Binning map for column '"+name+"' (id="+colID+") not existing.");
@@ -343,7 +344,7 @@ public class TfMetaUtils
//encode impute meta data into frame
for( Entry<String, String> e : mvmeta.entrySet() ) {
- int colID = colnames.indexOf(e.getKey()) + 1;
+ int colID = ArrayUtils.indexOf(colnames, e.getKey()) + 1;
String mvVal = e.getValue().split(TfUtils.TXMTD_SEP)[1];
ret.getColumnMetadata(colID-1).setMvValue(mvVal);
}
@@ -361,7 +362,7 @@ public class TfMetaUtils
* @throws IOException
*/
@SuppressWarnings("unchecked")
- private static List<Integer> parseRecodeColIDs(String spec, List<String> colnames)
+ private static List<Integer> parseRecodeColIDs(String spec, String[] colnames)
throws IOException
{
if( spec == null )
@@ -391,7 +392,7 @@ public class TfMetaUtils
* @return
* @throws IOException
*/
- public static List<Integer> parseBinningColIDs(String spec, List<String> colnames)
+ public static List<Integer> parseBinningColIDs(String spec, String[] colnames)
throws IOException
{
try {
@@ -409,7 +410,7 @@ public class TfMetaUtils
* @return
* @throws IOException
*/
- public static List<Integer> parseBinningColIDs(JSONObject jSpec, List<String> colnames)
+ public static List<Integer> parseBinningColIDs(JSONObject jSpec, String[] colnames)
throws IOException
{
try {
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/main/java/org/apache/sysml/runtime/util/DataConverter.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/util/DataConverter.java b/src/main/java/org/apache/sysml/runtime/util/DataConverter.java
index 9bb27d9..381ad87 100644
--- a/src/main/java/org/apache/sysml/runtime/util/DataConverter.java
+++ b/src/main/java/org/apache/sysml/runtime/util/DataConverter.java
@@ -23,7 +23,6 @@ import java.io.IOException;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
@@ -629,10 +628,10 @@ public class DataConverter
MatrixBlock mb = new MatrixBlock(m, n, false);
mb.allocateDenseBlock();
- List<ValueType> schema = frame.getSchema();
- int dFreq = Collections.frequency(schema, ValueType.DOUBLE);
+ ValueType[] schema = frame.getSchema();
+ int dFreq = UtilFunctions.frequency(schema, ValueType.DOUBLE);
- if( dFreq == schema.size() ) {
+ if( dFreq == schema.length ) {
// special case double schema (without cell-object creation,
// cache-friendly row-column copy)
double[][] a = new double[n][];
@@ -654,7 +653,7 @@ public class DataConverter
for( int i=0; i<frame.getNumRows(); i++ )
for( int j=0; j<frame.getNumColumns(); j++ ) {
mb.appendValue(i, j, UtilFunctions.objectToDouble(
- schema.get(j), frame.get(i, j)));
+ schema[j], frame.get(i, j)));
}
}
@@ -699,7 +698,7 @@ public class DataConverter
return new FrameBlock();
//create schema and frame block
- List<ValueType> schema = Collections.nCopies(data[0].length, ValueType.STRING);
+ ValueType[] schema = UtilFunctions.nCopies(data[0].length, ValueType.STRING);
return convertToFrameBlock(data, schema);
}
@@ -709,7 +708,7 @@ public class DataConverter
* @param schema
* @return
*/
- public static FrameBlock convertToFrameBlock(String[][] data, List<ValueType> schema) {
+ public static FrameBlock convertToFrameBlock(String[][] data, ValueType[] schema) {
//check for empty frame block
if( data == null || data.length==0 )
return new FrameBlock();
@@ -725,7 +724,7 @@ public class DataConverter
* @param colnames
* @return
*/
- public static FrameBlock convertToFrameBlock(String[][] data, List<ValueType> schema, List<String> colnames) {
+ public static FrameBlock convertToFrameBlock(String[][] data, ValueType[] schema, String[] colnames) {
//check for empty frame block
if( data == null || data.length==0 )
return new FrameBlock();
@@ -753,7 +752,7 @@ public class DataConverter
*/
public static FrameBlock convertToFrameBlock(MatrixBlock mb, ValueType vt) {
//create schema and frame block
- List<ValueType> schema = Collections.nCopies(mb.getNumColumns(), vt);
+ ValueType[] schema = UtilFunctions.nCopies(mb.getNumColumns(), vt);
return convertToFrameBlock(mb, schema);
}
@@ -763,7 +762,7 @@ public class DataConverter
* @param schema
* @return
*/
- public static FrameBlock convertToFrameBlock(MatrixBlock mb, List<ValueType> schema)
+ public static FrameBlock convertToFrameBlock(MatrixBlock mb, ValueType[] schema)
{
FrameBlock frame = new FrameBlock(schema);
Object[] row = new Object[mb.getNumColumns()];
@@ -780,7 +779,7 @@ public class DataConverter
double[] aval = sblock.values(i);
for( int j=apos; j<apos+alen; j++ ) {
row[aix[j]] = UtilFunctions.doubleToObject(
- schema.get(aix[j]), aval[j]);
+ schema[aix[j]], aval[j]);
}
}
frame.appendRow(row);
@@ -788,9 +787,9 @@ public class DataConverter
}
else //DENSE
{
- int dFreq = Collections.frequency(schema, ValueType.DOUBLE);
+ int dFreq = UtilFunctions.frequency(schema, ValueType.DOUBLE);
- if( dFreq == schema.size() ) {
+ if( dFreq == schema.length ) {
// special case double schema (without cell-object creation,
// col pre-allocation, and cache-friendly row-column copy)
int m = mb.getNumRows();
@@ -816,7 +815,7 @@ public class DataConverter
for( int i=0; i<mb.getNumRows(); i++ ) {
for( int j=0; j<mb.getNumColumns(); j++ ) {
row[j] = UtilFunctions.doubleToObject(
- schema.get(j), mb.quickGetValue(i, j));
+ schema[j], mb.quickGetValue(i, j));
}
frame.appendRow(row);
}
@@ -1052,7 +1051,7 @@ public class DataConverter
//print column names
sb.append("#"); sb.append(separator);
for( int j=0; j<colLength; j++ ) {
- sb.append(fb.getColumnNames().get(j));
+ sb.append(fb.getColumnNames()[j]);
if( j != colLength-1 )
sb.append(separator);
}
@@ -1061,7 +1060,7 @@ public class DataConverter
//print schema
sb.append("#"); sb.append(separator);
for( int j=0; j<colLength; j++ ) {
- sb.append(fb.getSchema().get(j));
+ sb.append(fb.getSchema()[j]);
if( j != colLength-1 )
sb.append(separator);
}
@@ -1078,7 +1077,7 @@ public class DataConverter
Object[] row = iter.next();
for( int j=0; j<colLength; j++ ) {
if( row[j]!=null ) {
- if( fb.getSchema().get(j) == ValueType.DOUBLE )
+ if( fb.getSchema()[j] == ValueType.DOUBLE )
sb.append(df.format(row[j]));
else
sb.append(row[j]);
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java b/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java
index c99b79b..de6b8e8 100644
--- a/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java
+++ b/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java
@@ -25,7 +25,6 @@ import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
-import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -432,7 +431,7 @@ public class MapReduceTool
writeMetaDataFile(mtdfile, vt, null, DataType.MATRIX, mc, outinfo);
}
- public static void writeMetaDataFile(String mtdfile, ValueType vt, List<ValueType> schema, DataType dt, MatrixCharacteristics mc, OutputInfo outinfo)
+ public static void writeMetaDataFile(String mtdfile, ValueType vt, ValueType[] schema, DataType dt, MatrixCharacteristics mc, OutputInfo outinfo)
throws IOException {
writeMetaDataFile(mtdfile, vt, schema, dt, mc, outinfo, null);
}
@@ -442,7 +441,7 @@ public class MapReduceTool
writeMetaDataFile(mtdfile, vt, null, DataType.MATRIX, mc, outinfo, formatProperties);
}
- public static void writeMetaDataFile(String mtdfile, ValueType vt, List<ValueType> schema, DataType dt, MatrixCharacteristics mc,
+ public static void writeMetaDataFile(String mtdfile, ValueType vt, ValueType[] schema, DataType dt, MatrixCharacteristics mc,
OutputInfo outinfo, FileFormatProperties formatProperties)
throws IOException
{
@@ -490,7 +489,7 @@ public class MapReduceTool
* @throws JSONException
* @throws DMLRuntimeException
*/
- public static String metaDataToString(String mtdfile, ValueType vt, List<ValueType> schema, DataType dt, MatrixCharacteristics mc,
+ public static String metaDataToString(String mtdfile, ValueType vt, ValueType[] schema, DataType dt, MatrixCharacteristics mc,
OutputInfo outinfo, FileFormatProperties formatProperties) throws JSONException, DMLRuntimeException
{
OrderedJSONObject mtd = new OrderedJSONObject(); // maintain order in output file
@@ -502,11 +501,11 @@ public class MapReduceTool
}
else {
StringBuffer schemaSB = new StringBuffer();
- for(int i=0; i < schema.size(); i++) {
- if( schema.get(i) == ValueType.UNKNOWN )
+ for(int i=0; i < schema.length; i++) {
+ if( schema[i] == ValueType.UNKNOWN )
schemaSB.append("*");
else
- schemaSB.append(schema.get(i).toString());
+ schemaSB.append(schema[i].toString());
schemaSB.append(DataExpression.DEFAULT_DELIM_DELIMITER);
}
mtd.put(DataExpression.SCHEMAPARAM, schemaSB.toString());
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java b/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java
index e5a792a..89472e1 100644
--- a/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java
+++ b/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java
@@ -23,6 +23,7 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
+import org.apache.commons.lang.ArrayUtils;
import org.apache.sysml.parser.Expression.DataType;
import org.apache.sysml.parser.Expression.ValueType;
import org.apache.sysml.runtime.instructions.InstructionUtils;
@@ -604,25 +605,6 @@ public class UtilFunctions
}
/**
- * Returns the subset of the schema
- *
- * @param srcSchema
- * @param lStart
- * @param lEnd
- *
- * @return
- */
- public static List<ValueType> getSubSchema(List<ValueType> srcSchema, long lStart, long lEnd)
- {
- ValueType [] schema = new ValueType[(int) (lEnd-lStart+1)];
- for(int i = 0; i < schema.length; i++)
- schema[i] = srcSchema.get((int) (lStart+i));
-
- return Arrays.asList(schema);
- }
-
-
- /**
* This function will return datatype, if its Matrix or Frame
*
* @param str
@@ -660,4 +642,39 @@ public class UtilFunctions
return (!sobj.equals("0") && !sobj.equals("0.0"));
}
}
+
+ /**
+ *
+ * @param n
+ * @param vt
+ * @return
+ */
+ public static ValueType[] nCopies(int n, ValueType vt) {
+ ValueType[] ret = new ValueType[n];
+ Arrays.fill(ret, vt);
+ return ret;
+ }
+
+ /**
+ *
+ * @param schema
+ * @param vt
+ * @return
+ */
+ public static int frequency(ValueType[] schema, ValueType vt) {
+ int count = 0;
+ for( ValueType tmp : schema )
+ count += tmp.equals(vt) ? 1 : 0;
+ return count;
+ }
+
+ /**
+ *
+ * @param schema1
+ * @param schema2
+ * @return
+ */
+ public static ValueType[] copyOf(ValueType[] schema1, ValueType[] schema2) {
+ return (ValueType[]) ArrayUtils.addAll(schema1, schema2);
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/test/java/org/apache/sysml/test/integration/AutomatedTestBase.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/AutomatedTestBase.java b/src/test/java/org/apache/sysml/test/integration/AutomatedTestBase.java
index 4273113..e7bb720 100644
--- a/src/test/java/org/apache/sysml/test/integration/AutomatedTestBase.java
+++ b/src/test/java/org/apache/sysml/test/integration/AutomatedTestBase.java
@@ -31,7 +31,6 @@ import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
-import java.util.List;
import org.apache.sysml.lops.Lop;
import org.apache.commons.io.FileUtils;
@@ -1700,7 +1699,7 @@ public abstract class AutomatedTestBase
* @throws IOException
* @throws DMLRuntimeException
*/
- protected double[][] writeInputFrame(String name, double[][] data, boolean bIncludeR, List<ValueType> schema, OutputInfo oi)
+ protected double[][] writeInputFrame(String name, double[][] data, boolean bIncludeR, ValueType[] schema, OutputInfo oi)
throws DMLRuntimeException, IOException
{
String completePath = baseDirectory + INPUT_DIR + name;
@@ -1725,14 +1724,14 @@ public abstract class AutomatedTestBase
return data;
}
- protected double[][] writeInputFrameWithMTD(String name, double[][] data, boolean bIncludeR, List<ValueType> schema, OutputInfo oi)
+ protected double[][] writeInputFrameWithMTD(String name, double[][] data, boolean bIncludeR, ValueType[] schema, OutputInfo oi)
throws DMLRuntimeException, IOException
{
MatrixCharacteristics mc = new MatrixCharacteristics(data.length, data[0].length, OptimizerUtils.DEFAULT_BLOCKSIZE, data[0].length, -1);
return writeInputFrameWithMTD(name, data, bIncludeR, mc, schema, oi);
}
- protected double[][] writeInputFrameWithMTD(String name, double[][] data, boolean bIncludeR, MatrixCharacteristics mc, List<ValueType> schema, OutputInfo oi)
+ protected double[][] writeInputFrameWithMTD(String name, double[][] data, boolean bIncludeR, MatrixCharacteristics mc, ValueType[] schema, OutputInfo oi)
throws DMLRuntimeException, IOException
{
writeInputFrame(name, data, bIncludeR, schema, oi);
@@ -1766,7 +1765,7 @@ public abstract class AutomatedTestBase
* @throws IOException
* @throws DMLRuntimeException
*/
- protected double[][] writeInputFrame(String name, double[][] data, List<ValueType> schema, OutputInfo oi)
+ protected double[][] writeInputFrame(String name, double[][] data, ValueType[] schema, OutputInfo oi)
throws DMLRuntimeException, IOException
{
return writeInputFrame(name, data, false, schema, oi);
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameAppendDistTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameAppendDistTest.java b/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameAppendDistTest.java
index 0d3b932..ccb91f4 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameAppendDistTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameAppendDistTest.java
@@ -20,7 +20,6 @@
package org.apache.sysml.test.integration.functions.frame;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.Collections;
import java.util.List;
@@ -158,11 +157,11 @@ public class FrameAppendDistTest extends AutomatedTestBase
inputDir() + " " + expectedDir() + " " + (rbind? "rbind": "cbind");
//initialize the frame data.
- List<ValueType> lschemaA = Arrays.asList(genMixSchema(cols1));
+ ValueType[] lschemaA = genMixSchema(cols1);
double[][] A = getRandomMatrix(rows1, cols1, min, max, sparsity, 1111 /*\\System.currentTimeMillis()*/);
writeInputFrameWithMTD("A", A, true, lschemaA, OutputInfo.BinaryBlockOutputInfo);
- List<ValueType> lschemaB = Arrays.asList(genMixSchema(cols2));
+ ValueType[] lschemaB = genMixSchema(cols2);
double[][] B = getRandomMatrix(rows2, cols2, min, max, sparsity, 2345 /*\\System.currentTimeMillis()*/);
writeInputFrameWithMTD("B", B, true, lschemaB, OutputInfo.BinaryBlockOutputInfo);
@@ -171,15 +170,14 @@ public class FrameAppendDistTest extends AutomatedTestBase
runTest(true, exceptionExpected, null, expectedNumberOfJobs);
runRScript(true);
- List<ValueType> lschemaAB = new ArrayList<ValueType>(lschemaA);
- lschemaAB.addAll(lschemaB);
+ ValueType[] lschemaAB = UtilFunctions.copyOf(lschemaA, lschemaB);
for(String file: config.getOutputFiles())
{
FrameBlock frameBlock = readDMLFrameFromHDFS(file, InputInfo.BinaryBlockInputInfo);
MatrixCharacteristics md = new MatrixCharacteristics(frameBlock.getNumRows(), frameBlock.getNumColumns(), -1, -1);
FrameBlock frameRBlock = readRFrameFromHDFS(file+".csv", InputInfo.CSVInputInfo, md);
- verifyFrameData(frameBlock, frameRBlock, (ValueType[]) lschemaAB.toArray(new ValueType[0]));
+ verifyFrameData(frameBlock, frameRBlock, (ValueType[]) lschemaAB);
System.out.println("File processed is " + file);
}
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameAppendTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameAppendTest.java b/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameAppendTest.java
index 555cf55..46d07b5 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameAppendTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameAppendTest.java
@@ -19,9 +19,6 @@
package org.apache.sysml.test.integration.functions.frame;
-import java.util.Arrays;
-import java.util.List;
-
import org.apache.sysml.parser.Expression.ValueType;
import org.apache.sysml.runtime.instructions.cp.AppendCPInstruction.AppendType;
import org.apache.sysml.runtime.matrix.data.FrameBlock;
@@ -90,24 +87,22 @@ public class FrameAppendTest extends AutomatedTestBase
double[][] B = getRandomMatrix(rows, schema2.length, -10, 10, 0.9, 129);
//init data frame 1
- List<ValueType> lschema1 = Arrays.asList(schema1);
- FrameBlock frame1 = new FrameBlock(lschema1);
- Object[] row1 = new Object[lschema1.size()];
+ FrameBlock frame1 = new FrameBlock(schema1);
+ Object[] row1 = new Object[schema1.length];
for( int i=0; i<rows; i++ ) {
- for( int j=0; j<lschema1.size(); j++ )
- A[i][j] = UtilFunctions.objectToDouble(lschema1.get(j),
- row1[j] = UtilFunctions.doubleToObject(lschema1.get(j), A[i][j]));
+ for( int j=0; j<schema1.length; j++ )
+ A[i][j] = UtilFunctions.objectToDouble(schema1[j],
+ row1[j] = UtilFunctions.doubleToObject(schema1[j], A[i][j]));
frame1.appendRow(row1);
}
//init data frame 2
- List<ValueType> lschema2 = Arrays.asList(schema2);
- FrameBlock frame2 = new FrameBlock(lschema2);
- Object[] row2 = new Object[lschema2.size()];
+ FrameBlock frame2 = new FrameBlock(schema2);
+ Object[] row2 = new Object[schema2.length];
for( int i=0; i<rows; i++ ) {
- for( int j=0; j<lschema2.size(); j++ )
- B[i][j] = UtilFunctions.objectToDouble(lschema2.get(j),
- row2[j] = UtilFunctions.doubleToObject(lschema2.get(j), B[i][j]));
+ for( int j=0; j<schema2.length; j++ )
+ B[i][j] = UtilFunctions.objectToDouble(schema2[j],
+ row2[j] = UtilFunctions.doubleToObject(schema2[j], B[i][j]));
frame2.appendRow(row2);
}
@@ -125,10 +120,10 @@ public class FrameAppendTest extends AutomatedTestBase
Assert.fail("Wrong number of rows: "+frame3.getNumRows()+", expected: "+mbC.getNumRows());
//check correct values
- List<ValueType> lschema = frame3.getSchema();
+ ValueType[] lschema = frame3.getSchema();
for( int i=0; i<rows; i++ )
- for( int j=0; j<lschema.size(); j++ ) {
- double tmp = UtilFunctions.objectToDouble(lschema.get(j), frame3.get(i, j));
+ for( int j=0; j<lschema.length; j++ ) {
+ double tmp = UtilFunctions.objectToDouble(lschema[j], frame3.get(i, j));
if( tmp != mbC.quickGetValue(i, j) )
Assert.fail("Wrong get value for cell ("+i+","+j+"): "+tmp+", expected: "+mbC.quickGetValue(i, j));
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameCastingTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameCastingTest.java b/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameCastingTest.java
index 5fe14dd..c7a5557 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameCastingTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameCastingTest.java
@@ -19,9 +19,6 @@
package org.apache.sysml.test.integration.functions.frame;
-import java.util.Arrays;
-import java.util.List;
-
import org.apache.sysml.parser.Expression.ValueType;
import org.apache.sysml.runtime.matrix.data.FrameBlock;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
@@ -102,12 +99,11 @@ public class FrameCastingTest extends AutomatedTestBase
if( ctype == CastType.F2M )
{
//construct input schema
- List<ValueType> lschema1 = Arrays.asList(schema);
- FrameBlock frame1 = new FrameBlock(lschema1);
- Object[] row1 = new Object[lschema1.size()];
+ FrameBlock frame1 = new FrameBlock(schema);
+ Object[] row1 = new Object[schema.length];
for( int i=0; i<rows; i++ ) {
- for( int j=0; j<lschema1.size(); j++ )
- row1[j] = UtilFunctions.doubleToObject(lschema1.get(j), A[i][j]);
+ for( int j=0; j<schema.length; j++ )
+ row1[j] = UtilFunctions.doubleToObject(schema[j], A[i][j]);
frame1.appendRow(row1);
}
@@ -122,7 +118,7 @@ public class FrameCastingTest extends AutomatedTestBase
else if( ctype == CastType.M2F_S )
{
MatrixBlock mb = DataConverter.convertToMatrixBlock(A);
- frame = DataConverter.convertToFrameBlock(mb, Arrays.asList(schema));
+ frame = DataConverter.convertToFrameBlock(mb, schema);
}
//check basic meta data
@@ -130,10 +126,10 @@ public class FrameCastingTest extends AutomatedTestBase
Assert.fail("Wrong number of rows: "+frame.getNumRows()+", expected: "+rows);
//check correct values
- List<ValueType> lschema = frame.getSchema();
+ ValueType[] lschema = frame.getSchema();
for( int i=0; i<rows; i++ )
- for( int j=0; j<lschema.size(); j++ ) {
- double tmp = UtilFunctions.objectToDouble(lschema.get(j), frame.get(i, j));
+ for( int j=0; j<lschema.length; j++ ) {
+ double tmp = UtilFunctions.objectToDouble(lschema[j], frame.get(i, j));
if( tmp != A[i][j] )
Assert.fail("Wrong get value for cell ("+i+","+j+"): "+tmp+", expected: "+A[i][j]);
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameConverterTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameConverterTest.java b/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameConverterTest.java
index e8c3c51..511f11c 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameConverterTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameConverterTest.java
@@ -285,9 +285,8 @@ public class FrameConverterTest extends AutomatedTestBase
try
{
//initialize the frame data.
- List<ValueType> lschema = Arrays.asList(schema);
- FrameBlock frame1 = new FrameBlock(lschema);
- initFrameData(frame1, A, lschema);
+ FrameBlock frame1 = new FrameBlock(schema);
+ initFrameData(frame1, A, schema);
//write frame data to hdfs
FrameWriter writer = FrameWriterFactory.createFrameWriter(oinfo);
@@ -346,9 +345,8 @@ public class FrameConverterTest extends AutomatedTestBase
}
else {
//initialize the frame data.
- List<ValueType> lschema = Arrays.asList(schema);
- frame1 = new FrameBlock(lschema);
- initFrameData(frame1, A, lschema);
+ frame1 = new FrameBlock(schema);
+ initFrameData(frame1, A, schema);
//write frame data to hdfs
FrameWriter writer = FrameWriterFactory.createFrameWriter(oinfo);
@@ -393,12 +391,12 @@ public class FrameConverterTest extends AutomatedTestBase
* @param data
* @param lschema
*/
- private void initFrameData(FrameBlock frame, double[][] data, List<ValueType> lschema) {
- Object[] row1 = new Object[lschema.size()];
+ private void initFrameData(FrameBlock frame, double[][] data, ValueType[] lschema) {
+ Object[] row1 = new Object[lschema.length];
for( int i=0; i<rows; i++ ) {
- for( int j=0; j<lschema.size(); j++ )
- data[i][j] = UtilFunctions.objectToDouble(lschema.get(j),
- row1[j] = UtilFunctions.doubleToObject(lschema.get(j), data[i][j]));
+ for( int j=0; j<lschema.length; j++ )
+ data[i][j] = UtilFunctions.objectToDouble(lschema[j],
+ row1[j] = UtilFunctions.doubleToObject(lschema[j], data[i][j]));
frame.appendRow(row1);
}
}
@@ -428,10 +426,10 @@ public class FrameConverterTest extends AutomatedTestBase
private void verifyFrameMatrixData(FrameBlock frame, MatrixBlock matrix) {
for ( int i=0; i<frame.getNumRows(); i++ )
for( int j=0; j<frame.getNumColumns(); j++ ) {
- Object val1 = UtilFunctions.doubleToObject(frame.getSchema().get(j),
- UtilFunctions.objectToDouble(frame.getSchema().get(j), frame.get(i, j)));
- Object val2 = UtilFunctions.doubleToObject(frame.getSchema().get(j), matrix.getValue(i, j));
- if(( UtilFunctions.compareTo(frame.getSchema().get(j), val1, val2)) != 0)
+ Object val1 = UtilFunctions.doubleToObject(frame.getSchema()[j],
+ UtilFunctions.objectToDouble(frame.getSchema()[j], frame.get(i, j)));
+ Object val2 = UtilFunctions.doubleToObject(frame.getSchema()[j], matrix.getValue(i, j));
+ if(( UtilFunctions.compareTo(frame.getSchema()[j], val1, val2)) != 0)
Assert.fail("Frame value for cell ("+ i + "," + j + ") is " + val1 +
", is not same as matrix value " + val2);
}
@@ -455,6 +453,7 @@ public class FrameConverterTest extends AutomatedTestBase
{
SparkExecutionContext sec = (SparkExecutionContext) ExecutionContextFactory.createContext();
JavaSparkContext sc = sec.getSparkContext();
+ ValueType[] lschema = schema.toArray(new ValueType[0]);
MapReduceTool.deleteFileIfExistOnHDFS(fnameOut);
@@ -483,7 +482,7 @@ public class FrameConverterTest extends AutomatedTestBase
OutputInfo oinfo = OutputInfo.BinaryBlockOutputInfo;
JavaPairRDD<LongWritable,Text> rddIn = sc.hadoopFile(fnameIn, iinfo.inputFormatClass, iinfo.inputKeyClass, iinfo.inputValueClass);
JavaPairRDD<LongWritable, FrameBlock> rddOut = FrameRDDConverterUtils
- .textCellToBinaryBlock(sc, rddIn, mc, schema)
+ .textCellToBinaryBlock(sc, rddIn, mc, lschema)
.mapToPair(new LongFrameToLongWritableFrameFunction());
rddOut.saveAsHadoopFile(fnameOut, LongWritable.class, FrameBlock.class, oinfo.outputFormatClass);
break;
@@ -519,8 +518,8 @@ public class FrameConverterTest extends AutomatedTestBase
//Create DataFrame
SQLContext sqlContext = new SQLContext(sc);
- StructType dfSchema = FrameRDDConverterUtils.convertFrameSchemaToDFSchema(schema, false);
- JavaRDD<Row> rowRDD = FrameRDDConverterUtils.csvToRowRDD(sc, fnameIn, separator, schema);
+ StructType dfSchema = FrameRDDConverterUtils.convertFrameSchemaToDFSchema(lschema, false);
+ JavaRDD<Row> rowRDD = FrameRDDConverterUtils.csvToRowRDD(sc, fnameIn, separator, lschema);
DataFrame df = sqlContext.createDataFrame(rowRDD, dfSchema);
JavaPairRDD<LongWritable, FrameBlock> rddOut = FrameRDDConverterUtils
@@ -535,7 +534,7 @@ public class FrameConverterTest extends AutomatedTestBase
JavaPairRDD<Long, FrameBlock> rddIn = sc
.hadoopFile(fnameIn, iinfo.inputFormatClass, LongWritable.class, FrameBlock.class)
.mapToPair(new LongWritableFrameToLongFrameFunction());
- DataFrame df = FrameRDDConverterUtils.binaryBlockToDataFrame(new SQLContext(sc), rddIn, mc, schema);
+ DataFrame df = FrameRDDConverterUtils.binaryBlockToDataFrame(new SQLContext(sc), rddIn, mc, lschema);
//Convert back DataFrame to binary block for comparison using original binary to converted DF and back to binary
JavaPairRDD<LongWritable, FrameBlock> rddOut = FrameRDDConverterUtils
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameCopyTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameCopyTest.java b/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameCopyTest.java
index e713a86..84bd36d 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameCopyTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameCopyTest.java
@@ -19,9 +19,6 @@
package org.apache.sysml.test.integration.functions.frame;
-import java.util.Arrays;
-import java.util.List;
-
import org.apache.sysml.parser.Expression.ValueType;
import org.apache.sysml.runtime.instructions.cp.AppendCPInstruction.AppendType;
import org.apache.sysml.runtime.matrix.data.FrameBlock;
@@ -89,14 +86,12 @@ public class FrameCopyTest extends AutomatedTestBase
//Initialize the frame data.
//init data frame 1
- List<ValueType> lschema1 = Arrays.asList(schema1);
- FrameBlock frame1 = new FrameBlock(lschema1);
- initFrameData(frame1, A, lschema1);
+ FrameBlock frame1 = new FrameBlock(schema1);
+ initFrameData(frame1, A, schema1);
//init data frame 2
- List<ValueType> lschema2 = Arrays.asList(schema2);
- FrameBlock frame2 = new FrameBlock(lschema2);
- initFrameData(frame2, B, lschema2);
+ FrameBlock frame2 = new FrameBlock(schema2);
+ initFrameData(frame2, B, schema2);
//copy from one frame to another.
FrameBlock frame1Backup = new FrameBlock(frame1.getSchema(), frame1.getColumnNames());
@@ -125,44 +120,44 @@ public class FrameCopyTest extends AutomatedTestBase
}
}
- void initFrameData(FrameBlock frame, double[][] data, List<ValueType> lschema)
+ void initFrameData(FrameBlock frame, double[][] data, ValueType[] lschema)
{
- Object[] row1 = new Object[lschema.size()];
+ Object[] row1 = new Object[lschema.length];
for( int i=0; i<rows; i++ ) {
- for( int j=0; j<lschema.size(); j++ )
- data[i][j] = UtilFunctions.objectToDouble(lschema.get(j),
- row1[j] = UtilFunctions.doubleToObject(lschema.get(j), data[i][j]));
+ for( int j=0; j<lschema.length; j++ )
+ data[i][j] = UtilFunctions.objectToDouble(lschema[j],
+ row1[j] = UtilFunctions.doubleToObject(lschema[j], data[i][j]));
frame.appendRow(row1);
}
}
void updateFrameWithDummyData(FrameBlock frame, int updateRow)
{
- List<ValueType>lschema = frame.getSchema();
- for( int j=0; j<lschema.size(); j++ ) {
- switch( lschema.get(j) ) {
+ ValueType[] lschema = frame.getSchema();
+ for( int j=0; j<lschema.length; j++ ) {
+ switch( lschema[j] ) {
case STRING: frame.set(updateRow, j, "String:"+ frame.get(updateRow, j)); break;
case BOOLEAN: frame.set(updateRow, j, ((Boolean)frame.get(updateRow, j))?(new Boolean(false)):(new Boolean(true))); break;
case INT: frame.set(updateRow, j, (Long)frame.get(updateRow, j) * 2 + 5); break;
case DOUBLE: frame.set(updateRow, j, (Double)frame.get(updateRow, j) * 2 + 7); break;
- default: throw new RuntimeException("Unsupported value type: "+lschema.get(j));
+ default: throw new RuntimeException("Unsupported value type: "+lschema[j]);
}
}
}
void verifyFrameData(FrameBlock frame1, FrameBlock frame2, int updateRow, boolean bEqual)
{
- List<ValueType>lschema = frame1.getSchema();
- for( int j=0; j<lschema.size(); j++ ) {
+ ValueType[ ]lschema = frame1.getSchema();
+ for( int j=0; j<lschema.length; j++ ) {
if(!bEqual)
{
- if( UtilFunctions.compareTo(lschema.get(j), frame1.get(updateRow, j), frame2.get(updateRow, j)) == 0)
+ if( UtilFunctions.compareTo(lschema[j], frame1.get(updateRow, j), frame2.get(updateRow, j)) == 0)
Assert.fail("Updated value for cell ("+ updateRow + "," + j + ") is " + frame1.get(updateRow, j) +
", same as original value "+frame2.get(updateRow, j));
}
else
{
- if( UtilFunctions.compareTo(lschema.get(j), frame1.get(updateRow, j), frame2.get(updateRow, j)) != 0)
+ if( UtilFunctions.compareTo(lschema[j], frame1.get(updateRow, j), frame2.get(updateRow, j)) != 0)
Assert.fail("Updated value for cell ("+ updateRow + "," + j + ") is " + frame1.get(updateRow, j) +
", not same as original value "+frame2.get(updateRow, j));
}
@@ -171,10 +166,10 @@ public class FrameCopyTest extends AutomatedTestBase
void verifyFrameData(FrameBlock frame1, FrameBlock frame2)
{
- List<ValueType> lschema = frame1.getSchema();
+ ValueType[] lschema = frame1.getSchema();
for ( int i=0; i<frame1.getNumRows(); i++ )
- for( int j=0; j<lschema.size(); j++ ) {
- if( UtilFunctions.compareTo(lschema.get(j), frame1.get(i, j), frame2.get(i, j)) != 0)
+ for( int j=0; j<lschema.length; j++ ) {
+ if( UtilFunctions.compareTo(lschema[j], frame1.get(i, j), frame2.get(i, j)) != 0)
Assert.fail("Target value for cell ("+ i + "," + j + ") is " + frame1.get(i, j) +
", is not same as original value " + frame2.get(i, j));
}