You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by bh...@apache.org on 2019/02/27 16:08:47 UTC
[arrow] branch master updated: ARROW-4682: [JS] Fix writing empty tables

This is an automated email from the ASF dual-hosted git repository.

bhulette pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new f5df773  ARROW-4682: [JS] Fix writing empty tables
f5df773 is described below

commit f5df77359953ed06e1dce47edffc03340e2ff3ea
Author: ptaylor <pa...@me.com>
AuthorDate: Wed Feb 27 08:08:31 2019 -0800

    ARROW-4682: [JS] Fix writing empty tables
    
    Closes https://issues.apache.org/jira/browse/ARROW-4682
    
    Author: ptaylor <pa...@me.com>
    
    Closes #3759 from trxcllnt/js/fix-write-empty-table and squashes the following commits:
    
    cae6622e <ptaylor> fix lint
    bfc9015a <ptaylor> ensure schema metadata is preserved through assign
    4898fbdf <ptaylor> assign should compare column names, not fields
    946a2718 <ptaylor> fix assigning to empty tables
    82192aed <ptaylor> add options to print horizontal table separator, schema metadata
    ea1c8db6 <ptaylor> fix writing empty tables
---
 js/src/bin/arrow2csv.ts               | 146 ++++++++++++++++++++++++++--------
 js/src/ipc/writer.ts                  |  35 +++++---
 js/src/schema.ts                      |   6 +-
 js/src/table.ts                       |   8 +-
 js/src/util/recordbatch.ts            |  16 ++--
 js/src/visitor/vectorassembler.ts     |   5 +-
 js/test/generate-test-data.ts         |   2 +-
 js/test/jest-extensions.ts            |   1 +
 js/test/unit/table/serialize-tests.ts |  59 +++++++++++++-
 9 files changed, 217 insertions(+), 61 deletions(-)

diff --git a/js/src/bin/arrow2csv.ts b/js/src/bin/arrow2csv.ts
index 885f350..b86f852 100644
--- a/js/src/bin/arrow2csv.ts
+++ b/js/src/bin/arrow2csv.ts
@@ -23,6 +23,7 @@ import * as fs from 'fs';
 import * as stream from 'stream';
 import { valueToString } from '../util/pretty';
 import { RecordBatch, RecordBatchReader, AsyncByteQueue } from '../Arrow.node';
+import { Schema } from '../schema';
 
 const padLeft = require('pad-left');
 const bignumJSONParse = require('json-bignum').parse;
@@ -30,7 +31,16 @@ const pipeline = require('util').promisify(stream.pipeline);
 const argv = require(`command-line-args`)(cliOpts(), { partial: true });
 const files = argv.help ? [] : [...(argv.file || []), ...(argv._unknown || [])].filter(Boolean);
 
-const state = { ...argv, closed: false, hasRecords: false };
+const state = { ...argv, closed: false, maxColWidths: [10] };
+
+type ToStringState = {
+    hr: string;
+    sep: string;
+    schema: any;
+    closed: boolean;
+    metadata: boolean;
+    maxColWidths: number[];
+};
 
 (async () => {
 
@@ -40,12 +50,14 @@ const state = { ...argv, closed: false, hasRecords: false };
     ].filter(Boolean) as (() => NodeJS.ReadableStream)[];
 
     let reader: RecordBatchReader | null;
+    let hasReaders = false;
 
     for (const source of sources) {
         if (state.closed) { break; }
         for await (reader of recordBatchReaders(source)) {
+            hasReaders = true;
             const source = reader.toNodeStream();
-            const xform = batchesToString(state);
+            const xform = batchesToString(state, reader.schema);
             const sink = new stream.PassThrough();
             sink.pipe(process.stdout, { end: false });
             await pipeline(source, xform, sink).catch(() => state.closed = true);
@@ -53,7 +65,7 @@ const state = { ...argv, closed: false, hasRecords: false };
         if (state.closed) { break; }
     }
 
-    return state.hasRecords ? 0 : print_usage();
+    return hasReaders ? 0 : print_usage();
 })()
 .then((x) => +x || 0, (err) => {
     if (err) {
@@ -93,44 +105,96 @@ async function *recordBatchReaders(createSourceStream: () => NodeJS.ReadableStre
     }
 }
 
-function batchesToString(state: { closed: boolean, schema: any, separator: string, hasRecords: boolean }) {
+function batchesToString(state: ToStringState, schema: Schema) {
+
+    let rowId = 0;
+    let batchId = -1;
+    let maxColWidths = [10];
+    const { hr, sep } = state;
 
-    let rowId = 0, maxColWidths = [15], separator = `${state.separator || ' |'} `;
+    const header = ['row_id', ...schema.fields.map((f) => `${f}`)].map(valueToString);
 
-    return new stream.Transform({ transform, encoding: 'utf8', writableObjectMode: true, readableObjectMode: false });
+    state.maxColWidths = header.map((x, i) => Math.max(maxColWidths[i] || 0, x.length));
+
+    return new stream.Transform({
+        transform,
+        encoding: 'utf8',
+        writableObjectMode: true,
+        readableObjectMode: false,
+        final(this: stream.Transform, cb: (error?: Error | null) => void) {
+            // if there were no batches, then print the Schema, and metadata
+            if (batchId === -1) {
+                this.push(`${horizontalRule(state.maxColWidths, hr, sep)}\n\n`);
+                this.push(`${formatRow(header, maxColWidths, sep)}\n`);
+                if (state.metadata && schema.metadata.size > 0) {
+                    this.push(`metadata:\n${formatMetadata(schema.metadata)}\n`);
+                }
+            }
+            this.push(`${horizontalRule(state.maxColWidths, hr, sep)}\n\n`);
+            cb();
+        }
+    });
 
     function transform(this: stream.Transform, batch: RecordBatch, _enc: string, cb: (error?: Error, data?: any) => void) {
+
         batch = !(state.schema && state.schema.length) ? batch : batch.select(...state.schema);
-        if (batch.length <= 0 || batch.numCols <= 0 || state.closed) {
-            state.hasRecords || (state.hasRecords = false);
-            return cb(undefined, null);
-        }
 
-        state.hasRecords = true;
-        const header = ['row_id', ...batch.schema.fields.map((f) => `${f}`)].map(valueToString);
+        if (state.closed) { return cb(undefined, null); }
 
         // Pass one to convert to strings and count max column widths
-        const newMaxWidths = measureColumnWidths(rowId, batch, header.map((x, i) => Math.max(maxColWidths[i] || 0, x.length)));
+        state.maxColWidths = measureColumnWidths(rowId, batch, header.map((x, i) => Math.max(maxColWidths[i] || 0, x.length)));
 
-        // If any of the column widths changed, print the header again
-        if ((rowId % 350) && JSON.stringify(newMaxWidths) !== JSON.stringify(maxColWidths)) {
-            this.push(`\n${formatRow(header, newMaxWidths, separator)}`);
+        // If this is the first batch in a stream, print a top horizontal rule, schema metadata, and 
+        if (++batchId === 0) {
+            this.push(`${horizontalRule(state.maxColWidths, hr, sep)}\n`);
+            if (state.metadata && batch.schema.metadata.size > 0) {
+                this.push(`metadata:\n${formatMetadata(batch.schema.metadata)}\n`);
+                this.push(`${horizontalRule(state.maxColWidths, hr, sep)}\n`);
+            }
+            if (batch.length <= 0 || batch.numCols <= 0) {
+                this.push(`${formatRow(header, maxColWidths = state.maxColWidths, sep)}\n`);
+            }
         }
 
-        maxColWidths = newMaxWidths;
-
-        for (const row of batch) {
-            if (state.closed) { break; }
-            else if (!row) { continue; }
-            if (!(rowId % 350)) { this.push(`\n${formatRow(header, maxColWidths, separator)}`); }
-            this.push(formatRow([rowId++, ...row].map(valueToString), maxColWidths, separator));
+        if (batch.length > 0 && batch.numCols > 0) {
+            // If any of the column widths changed, print the header again
+            if (rowId % 350 !== 0 && JSON.stringify(state.maxColWidths) !== JSON.stringify(maxColWidths)) {
+                this.push(`${formatRow(header, state.maxColWidths, sep)}\n`);
+            }
+            maxColWidths = state.maxColWidths;
+            for (const row of batch) {
+                if (state.closed) { break; } else if (!row) { continue; }
+                if (rowId++ % 350 === 0) {
+                    this.push(`${formatRow(header, maxColWidths, sep)}\n`);
+                }
+                this.push(`${formatRow([rowId, ...row].map(valueToString), maxColWidths, sep)}\n`);
+            }
         }
         cb();
     }
 }
 
-function formatRow(row: string[] = [], maxColWidths: number[] = [], separator: string = ' |') {
-    return row.map((x, j) => padLeft(x, maxColWidths[j])).join(separator) + '\n';
+function horizontalRule(maxColWidths: number[], hr = '-', sep = ' |') {
+    return ` ${padLeft('', maxColWidths.reduce((x, y) => x + y, -2 + maxColWidths.length * sep.length), hr)}`;
+}
+
+function formatRow(row: string[] = [], maxColWidths: number[] = [], sep = ' |') {
+    return `${row.map((x, j) => padLeft(x, maxColWidths[j])).join(sep)}`;
+}
+
+function formatMetadata(metadata: Map<string, string>) {
+
+    return [...metadata].map(([key, val]) =>
+        `  ${key}: ${formatMetadataValue(val)}`
+    ).join(',  \n');
+
+    function formatMetadataValue(value: string = '') {
+        let parsed = value;
+        try {
+            parsed = JSON.stringify(JSON.parse(value), null, 2);
+        } catch (e) { parsed = value; }
+        return valueToString(parsed).split('\n').join('\n  ');
+    }
 }
 
 function measureColumnWidths(rowId: number, batch: RecordBatch, maxColWidths: number[] = []) {
@@ -201,8 +265,19 @@ function cliOpts() {
         },
         {
             type: String,
-            name: 'sep', optional: true, default: '|',
-            description: 'The column separator character'
+            name: 'sep', optional: true, default: ' |',
+            description: 'The column separator character (default: " |")'
+        },
+        {
+            type: String,
+            name: 'hr', optional: true, default: '-',
+            description: 'The horizontal border character (default: "-")'
+        },
+        {
+            type: Boolean,
+            name: 'metadata', alias: 'm',
+            optional: true, default: false,
+            description: 'Flag to print Schema metadata (default: false)'
         },
         {
             type: Boolean,
@@ -234,14 +309,15 @@ function print_usage() {
         {
             header: 'Example',
             content: [
-                '$ arrow2csv --schema foo baz -f simple.arrow --sep ","',
-                '                                                      ',
-                '> "row_id", "foo: Int32", "bar: Float64", "baz: Utf8"',
-                '>        0,            1,              1,        "aa"',
-                '>        1,         null,           null,        null',
-                '>        2,            3,           null,        null',
-                '>        3,            4,              4,       "bbb"',
-                '>        4,            5,              5,      "cccc"',
+                '$ arrow2csv --schema foo baz --sep "," -f simple.arrow',
+                '>--------------------------------------',
+                '>   "row_id", "foo: Int32", "baz: Utf8"',
+                '>          0,            1,        "aa"',
+                '>          1,         null,        null',
+                '>          2,            3,        null',
+                '>          3,            4,       "bbb"',
+                '>          4,            5,      "cccc"',
+                '>--------------------------------------',
             ]
         }
     ]));
diff --git a/js/src/ipc/writer.ts b/js/src/ipc/writer.ts
index ee3f425..d6394d1 100644
--- a/js/src/ipc/writer.ts
+++ b/js/src/ipc/writer.ts
@@ -33,7 +33,7 @@ import { JSONTypeAssembler } from '../visitor/jsontypeassembler';
 import { JSONVectorAssembler } from '../visitor/jsonvectorassembler';
 import { ArrayBufferViewInput, toUint8Array } from '../util/buffer';
 import { Writable, ReadableInterop, ReadableDOMStreamOptions } from '../io/interfaces';
-import { isPromise, isAsyncIterable, isWritableDOMStream, isWritableNodeStream } from '../util/compat';
+import { isPromise, isAsyncIterable, isWritableDOMStream, isWritableNodeStream, isIterable } from '../util/compat';
 
 export class RecordBatchWriter<T extends { [key: string]: DataType } = any> extends ReadableInterop<Uint8Array> implements Writable<RecordBatch<T>> {
 
@@ -140,21 +140,34 @@ export class RecordBatchWriter<T extends { [key: string]: DataType } = any> exte
         return this;
     }
 
-    public write(chunk?: Table<T> | RecordBatch<T> | null) {
-        let schema: Schema<T> | null;
+    public write(payload?: Table<T> | RecordBatch<T> | Iterable<RecordBatch<T>> | null) {
+
+        let schema: Schema<T> | null = null;
+
         if (!this._sink) {
             throw new Error(`RecordBatchWriter is closed`);
-        } else if (!chunk || !(schema = chunk.schema)) {
+        } else if (payload === null || payload === undefined) {
+            return this.finish() && undefined;
+        } else if (payload instanceof Table && !(schema = payload.schema)) {
             return this.finish() && undefined;
-        } else if (schema !== this._schema) {
+        } else if (payload instanceof RecordBatch && !(schema = payload.schema)) {
+            return this.finish() && undefined;
+        }
+
+        if (schema && !schema.compareTo(this._schema)) {
             if (this._started && this._autoDestroy) {
                 return this.close();
             }
             this.reset(this._sink, schema);
         }
-        (chunk instanceof Table)
-            ? this.writeAll(chunk.chunks)
-            : this._writeRecordBatch(chunk);
+
+        if (payload instanceof RecordBatch) {
+            this._writeRecordBatch(payload);
+        } else if (payload instanceof Table) {
+            this.writeAll(payload.chunks);
+        } else if (isIterable(payload)) {
+            this.writeAll(payload);
+        }
     }
 
     protected _writeMessage<T extends MessageHeader>(message: Message<T>, alignment = 8) {
@@ -363,7 +376,11 @@ export class RecordBatchJSONWriter<T extends { [key: string]: DataType } = any>
 
 /** @ignore */
 function writeAll<T extends { [key: string]: DataType } = any>(writer: RecordBatchWriter<T>, input: Table<T> | Iterable<RecordBatch<T>>) {
-    const chunks = (input instanceof Table) ? input.chunks : input;
+    let chunks = input as Iterable<RecordBatch<T>>;
+    if (input instanceof Table) {
+        chunks = input.chunks;
+        writer.reset(undefined, input.schema);
+    }
     for (const batch of chunks) {
         writer.write(batch);
     }
diff --git a/js/src/schema.ts b/js/src/schema.ts
index b714c65..66e5591 100644
--- a/js/src/schema.ts
+++ b/js/src/schema.ts
@@ -92,8 +92,8 @@ export class Schema<T extends { [key: string]: DataType } = any> {
         const curDictionaryFields = this.dictionaryFields;
         const metadata = mergeMaps(mergeMaps(new Map(), this.metadata), other.metadata);
         const newFields = other.fields.filter((f2) => {
-            const i = curFields.findIndex((f) => f.compareTo(f2));
-            return ~i ? (curFields[i] = curFields[i].clone({
+            const i = curFields.findIndex((f) => f.name === f2.name);
+            return ~i ? (curFields[i] = f2.clone({
                 metadata: mergeMaps(mergeMaps(new Map(), curFields[i].metadata), f2.metadata)
             })) && false : true;
         }) as Field[];
@@ -102,7 +102,7 @@ export class Schema<T extends { [key: string]: DataType } = any> {
         const newDictionaries = [...dictionaries].filter(([y]) => !curDictionaries.every(([x]) => x === y));
         const newDictionaryFields = [...dictionaryFields].map(([id, newDictFields]) => {
             return [id, [...(curDictionaryFields.get(id) || []), ...newDictFields.map((f) => {
-                const i = newFields.findIndex((f2) => f2.compareTo(f));
+                const i = newFields.findIndex((f2) => f.name === f2.name);
                 const { dictionary, indices, isOrdered, dictionaryVector } = f.type;
                 const type = new Dictionary(dictionary, indices, id, isOrdered, dictionaryVector);
                 return newFields[i] = f.clone({ type });
diff --git a/js/src/table.ts b/js/src/table.ts
index 91b368a..89f6840 100644
--- a/js/src/table.ts
+++ b/js/src/table.ts
@@ -179,9 +179,11 @@ export class Table<T extends { [key: string]: DataType } = any>
             throw new TypeError('Table must be initialized with a Schema or at least one RecordBatch');
         }
 
-        if (!chunks[0]) { chunks[0] = new RecordBatch(schema, 0, []); }
+        if (!chunks[0]) {
+            chunks[0] = new RecordBatch(schema, 0, schema.fields.map((f) => new Data(f.type, 0, 0)));
+        }
 
-        super(chunks[0].type, chunks);
+        super(new Struct<T>(schema.fields), chunks);
 
         this._schema = schema;
         this._chunks = chunks;
@@ -252,7 +254,7 @@ export class Table<T extends { [key: string]: DataType } = any>
         const fields = this._schema.fields;
         const [indices, oldToNew] = other.schema.fields.reduce((memo, f2, newIdx) => {
             const [indices, oldToNew] = memo;
-            const i = fields.findIndex((f) => f.compareTo(f2));
+            const i = fields.findIndex((f) => f.name === f2.name);
             ~i ? (oldToNew[i] = newIdx) : indices.push(newIdx);
             return memo;
         }, [[], []] as number[][]);
diff --git a/js/src/util/recordbatch.ts b/js/src/util/recordbatch.ts
index c165aa4..40ced20 100644
--- a/js/src/util/recordbatch.ts
+++ b/js/src/util/recordbatch.ts
@@ -65,14 +65,14 @@ export function distributeVectorsIntoRecordBatches<T extends { [key: string]: Da
 /** @ignore */
 function uniformlyDistributeChunksAcrossRecordBatches<T extends { [key: string]: DataType } = any>(schema: Schema<T>, columns: Data<T[keyof T]>[][]): [Schema<T>, RecordBatch<T>[]] {
 
-    let numBatches = 0;
     const fields = [...schema.fields];
-    const batches = [] as [number, Data<T[keyof T]>[]][];
+    const batchArgs = [] as [number, Data<T[keyof T]>[]][];
     const memo = { numBatches: columns.reduce((n, c) => Math.max(n, c.length), 0) };
+    let sameLength = false, numBatches = 0, batchLength = 0, batchData: Data<T[keyof T]>[];
 
-    while (memo.numBatches > 0) {
+    while (memo.numBatches-- > 0) {
 
-        const [sameLength, batchLength] = columns.reduce((memo, [chunk]) => {
+        [sameLength, batchLength] = columns.reduce((memo, [chunk]) => {
             const [same, batchLength] = memo;
             const chunkLength = chunk ? chunk.length : batchLength;
             isFinite(batchLength) && same && (memo[0] = chunkLength === batchLength);
@@ -81,18 +81,18 @@ function uniformlyDistributeChunksAcrossRecordBatches<T extends { [key: string]:
         }, [true, Number.POSITIVE_INFINITY] as [boolean, number]);
 
         if (isFinite(batchLength) && !(sameLength && batchLength <= 0)) {
-            batches[numBatches++] = [batchLength, distributeChildData(fields, batchLength, columns, memo)];
+            batchData = distributeChildData(fields, batchLength, columns, memo);
+            batchLength > 0 && (batchArgs[numBatches++] = [batchLength, batchData]);
         }
     }
     return [
-        schema = new Schema<T>(fields),
-        batches.map((xs) => new RecordBatch(schema, ...xs))
+        schema = new Schema<T>(fields, schema.metadata),
+        batchArgs.map((xs) => new RecordBatch(schema, ...xs))
     ];
 }
 
 /** @ignore */
 function distributeChildData<T extends { [key: string]: DataType } = any>(fields: Field<T[keyof T]>[], batchLength: number, columns: Data<T[keyof T]>[][], memo: { numBatches: number }) {
-    memo.numBatches -= 1;
     let data: Data<T[keyof T]>;
     let field: Field<T[keyof T]>;
     let chunks: Data<T[keyof T]>[];
diff --git a/js/src/visitor/vectorassembler.ts b/js/src/visitor/vectorassembler.ts
index 1067855..3cbcb4e 100644
--- a/js/src/visitor/vectorassembler.ts
+++ b/js/src/visitor/vectorassembler.ts
@@ -59,7 +59,10 @@ export class VectorAssembler extends Visitor {
 
     /** @nocollapse */
     public static assemble<T extends Vector | RecordBatch>(...args: (T | T[])[]) {
-        return new VectorAssembler().visitMany(selectVectorChildrenArgs(RecordBatch, args))[0];
+        const assembler = new VectorAssembler();
+        const vectorChildren = selectVectorChildrenArgs(RecordBatch, args);
+        const [assembleResult = assembler] = assembler.visitMany(vectorChildren);
+        return assembleResult;
     }
 
     private constructor() { super(); }
diff --git a/js/test/generate-test-data.ts b/js/test/generate-test-data.ts
index c32ecba..114849c 100644
--- a/js/test/generate-test-data.ts
+++ b/js/test/generate-test-data.ts
@@ -152,7 +152,7 @@ export type GeneratedVector<TVec extends Vector = Vector> = {
     values: () => (TVec['TValue'] | null)[];
 };
 
-export const table = (lengths = [100], schema: Schema = new Schema(defaultRecordBatchChildren.slice())): GeneratedTable => {
+export const table = (lengths = [100], schema: Schema = new Schema(defaultRecordBatchChildren.slice(), new Map([['foo', 'bar']]))): GeneratedTable => {
     const generated = lengths.map((length) => recordBatch(length, schema));
     const rowBatches = generated.map(({ rows }) => rows);
     const colBatches = generated.map(({ cols }) => cols);
diff --git a/js/test/jest-extensions.ts b/js/test/jest-extensions.ts
index 75e0695..d87420d 100644
--- a/js/test/jest-extensions.ts
+++ b/js/test/jest-extensions.ts
@@ -55,6 +55,7 @@ function toEqualTable(this: jest.MatcherUtils, actual: Table, expected: Table) {
     const failures = [] as string[];
     try { expect(actual.length).toEqual(expected.length); } catch (e) { failures.push(`${e}`); }
     try { expect(actual.numCols).toEqual(expected.numCols); } catch (e) { failures.push(`${e}`); }
+    try { expect(actual.schema.metadata).toEqual(expected.schema.metadata); } catch (e) { failures.push(`${e}`); }
     (() => {
         for (let i = -1, n = actual.numCols; ++i < n;) {
             const v1 = actual.getColumnAt(i);
diff --git a/js/test/unit/table/serialize-tests.ts b/js/test/unit/table/serialize-tests.ts
index 1e521aa..a4a2df2 100644
--- a/js/test/unit/table/serialize-tests.ts
+++ b/js/test/unit/table/serialize-tests.ts
@@ -18,18 +18,51 @@
 import '../../jest-extensions';
 import * as generate from '../../generate-test-data';
 import {
-    Table, Schema, Field, DataType, Dictionary, Int32, Float32, Utf8
+    Table, Schema, Field, DataType, Dictionary, Int32, Float32, Utf8, Null
 } from '../../Arrow';
 
 const toSchema = (...xs: [string, DataType][]) => new Schema(xs.map((x) => new Field(...x)));
 const schema1 = toSchema(['a', new Int32()], ['b', new Float32()], ['c', new Dictionary(new Utf8(), new Int32())]);
 const schema2 = toSchema(['d', new Int32()], ['e', new Float32()], ['f', new Utf8()]);
+const nullSchema = new Schema([new Field('null', new Null())]);
+
+schema1.metadata.set('foo', 'bar');
 
 function createTable<T extends { [key: string]: DataType } = any>(schema: Schema<T>, chunkLengths: number[]) {
     return generate.table(chunkLengths, schema).table;
 }
 
 describe('Table#serialize()', () => {
+
+    test(`Table#empty round-trips through serialization`, () => {
+        const source = Table.empty();
+        source.schema.metadata.set('foo', 'bar');
+        expect(source.length).toBe(0);
+        expect(source.numCols).toBe(0);
+        const result = Table.from(source.serialize());
+        expect(result).toEqualTable(source);
+        expect(result.schema.metadata.get('foo')).toEqual('bar');
+    });
+
+    test(`Schema metadata round-trips through serialization`, () => {
+        const source = createTable(schema1, [20]);
+        expect(source.length).toBe(20);
+        expect(source.numCols).toBe(3);
+        const result = Table.from(source.serialize());
+        expect(result).toEqualTable(source);
+        expect(result.schema.metadata.get('foo')).toEqual('bar');
+    });
+
+    test(`Table#assign an empty Table to a Table with a zero-length Null column round-trips through serialization`, () => {
+        const table1 = new Table(nullSchema);
+        const table2 = Table.empty();
+        const source = table1.assign(table2);
+        expect(source.length).toBe(0);
+        expect(source.numCols).toBe(1);
+        const result = Table.from(source.serialize());
+        expect(result).toEqualTable(source);
+    });
+
     const chunkLengths = [] as number[];
     for (let i = -1; ++i < 3;) {
         chunkLengths[i] = (Math.random() * 100) | 0;
@@ -51,6 +84,26 @@ describe('Table#serialize()', () => {
             expect(source.numCols).toBe(6);
             const result = Table.from(source.serialize());
             expect(result).toEqualTable(source);
+            expect(result.schema.metadata.get('foo')).toEqual('bar');
+        });
+        test(`Table#assign with an empty table round-trips through serialization`, () => {
+            const table1 = table(schema1);
+            const source = table1.assign(Table.empty());
+            expect(source.numCols).toBe(table1.numCols);
+            expect(source.length).toBe(table1.length);
+            const result = Table.from(source.serialize());
+            expect(result).toEqualTable(source);
+            expect(result.schema.metadata.get('foo')).toEqual('bar');
+        });
+        test(`Table#assign with a zero-length Null column round-trips through serialization`, () => {
+            const table1 = new Table(nullSchema);
+            const table2 = table(schema1);
+            const source = table1.assign(table2);
+            expect(source.length).toBe(table2.length);
+            expect(source.numCols).toBe(4);
+            const result = Table.from(source.serialize());
+            expect(result).toEqualTable(source);
+            expect(result.schema.metadata.get('foo')).toEqual('bar');
         });
         test(`Table#assign with different lengths and number of chunks round-trips through serialization`, () => {
             const table1 = table(schema1);
@@ -60,6 +113,7 @@ describe('Table#serialize()', () => {
             expect(source.length).toBe(Math.max(table1.length, table2.length));
             const result = Table.from(source.serialize());
             expect(result).toEqualTable(source);
+            expect(result.schema.metadata.get('foo')).toEqual('bar');
         });
         test(`Table#select with Table#assign the result of Table#selectAt round-trips through serialization`, () => {
             const table1 = table(schema1);
@@ -68,6 +122,7 @@ describe('Table#serialize()', () => {
             expect(source.numCols).toBe(3);
             const result = Table.from(source.serialize());
             expect(result).toEqualTable(source);
+            expect(result.schema.metadata.get('foo')).toEqual('bar');
         });
         test(`Table#slice round-trips through serialization`, () => {
             const table1 = table(schema1);
@@ -78,6 +133,7 @@ describe('Table#serialize()', () => {
             expect(source.length).toBe(end - begin);
             const result = Table.from(source.serialize());
             expect(result).toEqualTable(source);
+            expect(result.schema.metadata.get('foo')).toEqual('bar');
         });
         test(`Table#concat of two slices round-trips through serialization`, () => {
             const table1 = table(schema1);
@@ -93,6 +149,7 @@ describe('Table#serialize()', () => {
             [slice1, slice2, source].forEach((x) => expect(x.numCols).toBe(3));
             const result = Table.from(source.serialize());
             expect(result).toEqualTable(source);
+            expect(result.schema.metadata.get('foo')).toEqual('bar');
         });
     }
 });