You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemds.apache.org by mb...@apache.org on 2021/06/06 13:24:05 UTC

[systemds] branch master updated: [SYSTEMDS-2565] Fix empty frame serialization (binary layout)

This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/master by this push:
     new 843f7cd  [SYSTEMDS-2565] Fix empty frame serialization (binary layout)
843f7cd is described below

commit 843f7cdbc1952b16af67a7ae9421baae2193c9ef
Author: Matthias Boehm <mb...@gmail.com>
AuthorDate: Sun Jun 6 15:22:40 2021 +0200

    [SYSTEMDS-2565] Fix empty frame serialization (binary layout)
    
    This patch fixes special cases of serializing (on broadcast, shuffle,
    eviction, and write) empty frame blocks whose column arrays are not
    allocated. The retain the original layout (for backwards compatibility)
    but store the column value type (0...128) as signed bytes where negative
    values (e.g., -2) indicate both the value type (2 .. FP64) as well as
    the non-allocated column array.
---
 .../sysds/runtime/matrix/data/FrameBlock.java      | 43 +++++++-----
 .../component/frame/FrameSerializationTest.java    | 81 +++++++++++++---------
 2 files changed, 75 insertions(+), 49 deletions(-)

diff --git a/src/main/java/org/apache/sysds/runtime/matrix/data/FrameBlock.java b/src/main/java/org/apache/sysds/runtime/matrix/data/FrameBlock.java
index bc4bfc3..0f77d53 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/FrameBlock.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/FrameBlock.java
@@ -740,14 +740,18 @@ public class FrameBlock implements CacheBlock, Externalizable  {
 		out.writeBoolean(isDefaultMeta);
 		//write columns (value type, data)
 		for( int j=0; j<getNumColumns(); j++ ) {
-			out.writeByte(_schema[j].ordinal());
+			byte type = (byte)_schema[j].ordinal();
+			if( _coldata == null || _coldata[j] == null )
+				type *= -1; //negative to indicate non-existence
+			out.writeByte(type);
 			if( !isDefaultMeta ) {
 				out.writeUTF(getColumnName(j));
 				out.writeLong(_colmeta[j].getNumDistinct());
 				out.writeUTF( (_colmeta[j].getMvValue()!=null) ?
-						_colmeta[j].getMvValue() : "" );
+					_colmeta[j].getMvValue() : "" );
 			}
-			_coldata[j].write(out);
+			if( type >= 0 )
+				_coldata[j].write(out);
 		}
 	}
 
@@ -759,34 +763,37 @@ public class FrameBlock implements CacheBlock, Externalizable  {
 		boolean isDefaultMeta = in.readBoolean();
 		//allocate schema/meta data arrays
 		_schema = (_schema!=null && _schema.length==numCols) ?
-				_schema : new ValueType[numCols];
+			_schema : new ValueType[numCols];
 		_colnames = (_colnames != null && _colnames.length==numCols) ?
-				_colnames : new String[numCols];
+			_colnames : new String[numCols];
 		_colmeta = (_colmeta != null && _colmeta.length==numCols) ?
-				_colmeta : new ColumnMetadata[numCols];
+			_colmeta : new ColumnMetadata[numCols];
 		_coldata = (_coldata!=null && _coldata.length==numCols) ?
-				_coldata : new Array[numCols];
+			_coldata : new Array[numCols];
 		//read columns (value type, meta, data)
 		for( int j=0; j<numCols; j++ ) {
-			ValueType vt = ValueType.values()[in.readByte()];
+			byte type = in.readByte();
+			ValueType vt = ValueType.values()[Math.abs(type)];
 			String name = isDefaultMeta ? createColName(j) : in.readUTF();
 			long ndistinct = isDefaultMeta ? 0 : in.readLong();
 			String mvvalue = isDefaultMeta ? null : in.readUTF();
 			Array arr = null;
-			switch( vt ) {
-				case STRING:  arr = new StringArray(new String[_numRows]); break;
-				case BOOLEAN: arr = new BooleanArray(new boolean[_numRows]); break;
-				case INT64:     arr = new LongArray(new long[_numRows]); break;
-				case FP64:  arr = new DoubleArray(new double[_numRows]); break;
-				case INT32: arr = new IntegerArray(new int[_numRows]); break;
-				case FP32:  arr = new FloatArray(new float[_numRows]); break;
-				default: throw new IOException("Unsupported value type: "+vt);
+			if( type > 0 ) { //non-empty column
+				switch( vt ) {
+					case STRING:  arr = new StringArray(new String[_numRows]); break;
+					case BOOLEAN: arr = new BooleanArray(new boolean[_numRows]); break;
+					case INT64:     arr = new LongArray(new long[_numRows]); break;
+					case FP64:  arr = new DoubleArray(new double[_numRows]); break;
+					case INT32: arr = new IntegerArray(new int[_numRows]); break;
+					case FP32:  arr = new FloatArray(new float[_numRows]); break;
+					default: throw new IOException("Unsupported value type: "+vt);
+				}
+				arr.readFields(in);
 			}
-			arr.readFields(in);
 			_schema[j] = vt;
 			_colnames[j] = name;
 			_colmeta[j] = new ColumnMetadata(ndistinct,
-					(mvvalue==null || mvvalue.isEmpty()) ? null : mvvalue);
+				(mvvalue==null || mvvalue.isEmpty()) ? null : mvvalue);
 			_coldata[j] = arr;
 		}
 		_msize = -1;
diff --git a/src/test/java/org/apache/sysds/test/component/frame/FrameSerializationTest.java b/src/test/java/org/apache/sysds/test/component/frame/FrameSerializationTest.java
index c958157..f2c30b5 100644
--- a/src/test/java/org/apache/sysds/test/component/frame/FrameSerializationTest.java
+++ b/src/test/java/org/apache/sysds/test/component/frame/FrameSerializationTest.java
@@ -52,49 +52,65 @@ public class FrameSerializationTest extends AutomatedTestBase
 
 	@Test
 	public void testFrameStringsWritable()  {
-		runFrameSerializeTest(schemaStrings, SerType.WRITABLE_SER);
+		runFrameSerializeTest(schemaStrings, SerType.WRITABLE_SER, false);
 	}
 	
 	@Test
 	public void testFrameMixedWritable()  {
-		runFrameSerializeTest(schemaMixed, SerType.WRITABLE_SER);
+		runFrameSerializeTest(schemaMixed, SerType.WRITABLE_SER, false);
 	}
 	
 	@Test
 	public void testFrameStringsJava()  {
-		runFrameSerializeTest(schemaStrings, SerType.JAVA_SER);
+		runFrameSerializeTest(schemaStrings, SerType.JAVA_SER, false);
 	}
 	
 	@Test
 	public void testFrameMixedJava()  {
-		runFrameSerializeTest(schemaMixed, SerType.JAVA_SER);
+		runFrameSerializeTest(schemaMixed, SerType.JAVA_SER, false);
+	}
+	
+	@Test
+	public void testEmptyFrameStringsWritable()  {
+		runFrameSerializeTest(schemaStrings, SerType.WRITABLE_SER, true);
+	}
+	
+	@Test
+	public void testEmptyFrameMixedWritable()  {
+		runFrameSerializeTest(schemaMixed, SerType.WRITABLE_SER, true);
+	}
+	
+	@Test
+	public void testEmptyFrameStringsJava()  {
+		runFrameSerializeTest(schemaStrings, SerType.JAVA_SER, true);
 	}
-
 	
-	/**
-	 * 
-	 * @param sparseM1
-	 * @param sparseM2
-	 * @param instType
-	 */
-	private void runFrameSerializeTest( ValueType[] schema, SerType stype)
+	@Test
+	public void testEmptyFrameMixedJava()  {
+		runFrameSerializeTest(schemaMixed, SerType.JAVA_SER, true);
+	}
+	
+	private void runFrameSerializeTest( ValueType[] schema, SerType stype, boolean empty)
 	{
 		try
 		{
-			//data generation
-			double[][] A = getRandomMatrix(rows, schema.length, -10, 10, 0.9, 8234); 
-			
 			//init data frame
 			FrameBlock frame = new FrameBlock(schema);
+		
+			//data generation
+			double[][] A = empty ? null :
+				getRandomMatrix(rows, schema.length, -10, 10, 0.9, 8234);
 			
 			//init data frame 
-			Object[] row = new Object[schema.length];
-			for( int i=0; i<rows; i++ ) {
-				for( int j=0; j<schema.length; j++ )
-					A[i][j] = UtilFunctions.objectToDouble(schema[j], 
-							row[j] = UtilFunctions.doubleToObject(schema[j], A[i][j]));
-				frame.appendRow(row);
-			}			
+			if( !empty ) {
+				Object[] row = new Object[schema.length];
+				for( int i=0; i<rows; i++ ) {
+					for( int j=0; j<schema.length; j++ )
+						A[i][j] = UtilFunctions.objectToDouble(schema[j],
+								row[j] = UtilFunctions.doubleToObject(schema[j], A[i][j]));
+					frame.appendRow(row);
+				}
+			}
 			
 			//core serialization and deserialization
 			if( stype == SerType.WRITABLE_SER ) {
@@ -122,16 +138,19 @@ public class FrameSerializationTest extends AutomatedTestBase
 			}
 			
 			//check basic meta data
-			if( frame.getNumRows() != rows )
-				Assert.fail("Wrong number of rows: "+frame.getNumRows()+", expected: "+rows);
+			int numExpected = empty ? 0 : rows;
+			if( frame.getNumRows() != numExpected )
+				Assert.fail("Wrong number of rows: "+frame.getNumRows()+", expected: "+numExpected);
 		
-			//check correct values			
-			for( int i=0; i<rows; i++ ) 
-				for( int j=0; j<schema.length; j++ )	{
-					double tmp = UtilFunctions.objectToDouble(schema[j], frame.get(i, j));
-					if( tmp != A[i][j] )
-						Assert.fail("Wrong get value for cell ("+i+","+j+"): "+tmp+", expected: "+A[i][j]);
-				}		
+			//check correct values
+			if( !empty ) {
+				for( int i=0; i<rows; i++ ) 
+					for( int j=0; j<schema.length; j++ ) {
+						double tmp = UtilFunctions.objectToDouble(schema[j], frame.get(i, j));
+						if( tmp != A[i][j] )
+							Assert.fail("Wrong get value for cell ("+i+","+j+"): "+tmp+", expected: "+A[i][j]);
+					}
+			}
 		}
 		catch(Exception ex) {
 			ex.printStackTrace();