You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by bh...@apache.org on 2019/02/13 03:55:30 UTC

[arrow] branch master updated: ARROW-4523, ARROW-4524: [JS] Add row proxy generation benchmark, improve performance

This is an automated email from the ASF dual-hosted git repository.

bhulette pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 75b3166  ARROW-4523,ARROW-4524: [JS] Add row proxy generation benchmark, improve performance
75b3166 is described below

commit 75b31660819d30f9d579530123806430ca359494
Author: Brian Hulette <hu...@gmail.com>
AuthorDate: Tue Feb 12 19:55:15 2019 -0800

    ARROW-4523,ARROW-4524: [JS] Add row proxy generation benchmark, improve performance
    
    It seems that using `Object.create` in the `bind` method slows down proxy generation substantially. I separated the `Row` and `RowProxyGenerator` concepts - now the generator creates a prototype based on `Row` and the parent vector when it is created, and constructs instances based on that prototype repeatedly in `bind`.
    
    This change improved the included benchmark from 1,114.78 ms to 154.96 ms on my laptop (Intel i5-7200U).
    
    Author: Brian Hulette <hu...@gmail.com>
    
    Closes #3601 from TheNeuralBit/proxy-bench and squashes the following commits:
    
    24425452 <Brian Hulette> Remove inner class, don't re-define columns
    7bb6e4f2 <Brian Hulette> Update comparator, switch to Symbols for internal variables in Row
    4079439e <Brian Hulette> linter fixes
    da4d97c2 <Brian Hulette> Switch back to Object.create with no extra property descriptor
    8a5d1629 <Brian Hulette> Improve Row proxy performance
    fb7a0f04 <Brian Hulette> add row proxy benchmark
---
 js/perf/index.js        |  11 ++++
 js/src/util/vector.ts   | 137 ++++++++++++++++++++++++++++++++----------------
 js/src/vector/map.ts    |   8 +--
 js/src/vector/row.ts    | 123 ++++++++++++++++++++++++-------------------
 js/src/vector/struct.ts |   8 +--
 js/src/visitor/get.ts   |   2 +-
 6 files changed, 179 insertions(+), 110 deletions(-)

diff --git a/js/perf/index.js b/js/perf/index.js
index 0e9c2bd..2129b9d 100644
--- a/js/perf/index.js
+++ b/js/perf/index.js
@@ -44,10 +44,12 @@ for (let { name, buffers } of require('./table_config')) {
 for (let {name, buffers, countBys, counts} of require('./table_config')) {
     const table = Table.from(buffers);
 
+    const tableIterateSuiteName = `Table Iterate "${name}"`;
     const dfCountBySuiteName = `DataFrame Count By "${name}"`;
     const dfFilterCountSuiteName = `DataFrame Filter-Scan Count "${name}"`;
     const dfDirectCountSuiteName = `DataFrame Direct Count "${name}"`;
 
+    suites.push(createTestSuite(tableIterateSuiteName, createTableIterateTest(table)));
     suites.push(...countBys.map((countBy) => createTestSuite(dfCountBySuiteName, createDataFrameCountByTest(table, countBy))));
     suites.push(...counts.map(({ col, test, value }) => createTestSuite(dfFilterCountSuiteName, createDataFrameFilterCountTest(table, col, test, value))));
     suites.push(...counts.map(({ col, test, value }) => createTestSuite(dfDirectCountSuiteName, createDataFrameDirectCountTest(table, col, test, value))));
@@ -135,6 +137,15 @@ function createGetByIndexTest(vector, name) {
     };
 }
 
+function createTableIterateTest(table) {
+    let value;
+    return {
+        async: true,
+        name: `length: ${table.length}\n`,
+        fn() { for (value of table) {} }
+    };
+}
+
 function createDataFrameDirectCountTest(table, column, test, value) {
     let sum, colidx = table.schema.fields.findIndex((c)=>c.name === column);
 
diff --git a/js/src/util/vector.ts b/js/src/util/vector.ts
index 92f348d..8acf42e 100644
--- a/js/src/util/vector.ts
+++ b/js/src/util/vector.ts
@@ -16,7 +16,7 @@
 // under the License.
 
 import { Vector } from '../vector';
-import { Row } from '../vector/row';
+import { Row, kLength } from '../vector/row';
 import { compareArrayLike } from '../util/buffer';
 
 /** @ignore */
@@ -75,60 +75,105 @@ export function createElementComparator(search: any) {
     }
     // Compare Array-likes
     if (Array.isArray(search)) {
-        const n = (search as any).length;
-        const fns = [] as ((x: any) => boolean)[];
-        for (let i = -1; ++i < n;) {
-            fns[i] = createElementComparator((search as any)[i]);
-        }
-        return (value: any) => {
-            if (!value || value.length !== n) { return false; }
-            // Handle the case where the search element is an Array, but the
-            // values are Rows or Vectors, e.g. list.indexOf(['foo', 'bar'])
-            if ((value instanceof Row) || (value instanceof Vector)) {
-                for (let i = -1, n = value.length; ++i < n;) {
-                    if (!(fns[i]((value as any).get(i)))) { return false; }
-                }
-                return true;
-            }
-            for (let i = -1, n = value.length; ++i < n;) {
-                if (!(fns[i](value[i]))) { return false; }
+        return createArrayLikeComparator(search);
+    }
+    // Compare Rows
+    if (search instanceof Row) {
+        return createRowComparator(search);
+    }
+    // Compare Vectors
+    if (search instanceof Vector) {
+        return createVectorComparator(search);
+    }
+    // Compare non-empty Objects
+    const keys = Object.keys(search);
+    if (keys.length > 0) {
+        return createObjectKeysComparator(search, keys);
+    }
+    // No valid comparator
+    return () => false;
+}
+
+/** @ignore */
+function createArrayLikeComparator(search: ArrayLike<any>) {
+    const n = search.length;
+    const fns = [] as ((x: any) => boolean)[];
+    for (let i = -1; ++i < n;) {
+        fns[i] = createElementComparator((search as any)[i]);
+    }
+    return (value: any) => {
+        if (!value) { return false; }
+        // Handle the case where the search element is an Array, but the
+        // values are Rows or Vectors, e.g. list.indexOf(['foo', 'bar'])
+        if (value instanceof Row) {
+            if (value[kLength] !== n) { return false; }
+            for (let i = -1; ++i < n;) {
+                if (!(fns[i](value.get(i)))) { return false; }
             }
             return true;
-        };
-    }
-    // Compare Rows and Vectors
-    if ((search instanceof Row) || (search instanceof Vector)) {
-        const n = search.length;
-        const C = search.constructor as any;
-        const fns = [] as ((x: any) => boolean)[];
-        for (let i = -1; ++i < n;) {
-            fns[i] = createElementComparator((search as any).get(i));
         }
-        return (value: any) => {
-            if (!(value instanceof C)) { return false; }
-            if (!(value.length === n)) { return false; }
+        if (value.length !== n) { return false; }
+        if (value instanceof Vector) {
             for (let i = -1; ++i < n;) {
                 if (!(fns[i](value.get(i)))) { return false; }
             }
             return true;
-        };
+        }
+        for (let i = -1; ++i < n;) {
+            if (!(fns[i](value[i]))) { return false; }
+        }
+        return true;
+    };
+}
+
+/** @ignore */
+function createRowComparator(search: Row<any>) {
+    const n = search[kLength];
+    const C = search.constructor as any;
+    const fns = [] as ((x: any) => boolean)[];
+    for (let i = -1; ++i < n;) {
+        fns[i] = createElementComparator(search.get(i));
     }
-    // Compare non-empty Objects
-    const keys = Object.keys(search);
-    if (keys.length > 0) {
-        const n = keys.length;
-        const fns = [] as ((x: any) => boolean)[];
+    return (value: any) => {
+        if (!(value instanceof C)) { return false; }
+        if (!(value[kLength] === n)) { return false; }
         for (let i = -1; ++i < n;) {
-            fns[i] = createElementComparator(search[keys[i]]);
+            if (!(fns[i](value.get(i)))) { return false; }
         }
-        return (value: any) => {
-            if (!value || typeof value !== 'object') { return false; }
-            for (let i = -1; ++i < n;) {
-                if (!(fns[i](value[keys[i]]))) { return false; }
-            }
-            return true;
-        };
+        return true;
+    };
+}
+
+/** @ignore */
+function createVectorComparator(search: Vector<any>) {
+    const n = search.length;
+    const C = search.constructor as any;
+    const fns = [] as ((x: any) => boolean)[];
+    for (let i = -1; ++i < n;) {
+        fns[i] = createElementComparator((search as any).get(i));
     }
-    // No valid comparator
-    return () => false;
+    return (value: any) => {
+        if (!(value instanceof C)) { return false; }
+        if (!(value.length === n)) { return false; }
+        for (let i = -1; ++i < n;) {
+            if (!(fns[i](value.get(i)))) { return false; }
+        }
+        return true;
+    };
+}
+
+/** @ignore */
+function createObjectKeysComparator(search: any, keys: string[]) {
+    const n = keys.length;
+    const fns = [] as ((x: any) => boolean)[];
+    for (let i = -1; ++i < n;) {
+        fns[i] = createElementComparator(search[keys[i]]);
+    }
+    return (value: any) => {
+        if (!value || typeof value !== 'object') { return false; }
+        for (let i = -1; ++i < n;) {
+            if (!(fns[i](value[keys[i]]))) { return false; }
+        }
+        return true;
+    };
 }
diff --git a/js/src/vector/map.ts b/js/src/vector/map.ts
index 27c51a6..54956d7 100644
--- a/js/src/vector/map.ts
+++ b/js/src/vector/map.ts
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import { Row } from './row';
+import { RowProxyGenerator } from './row';
 import { Vector } from '../vector';
 import { BaseVector } from './base';
 import { DataType, Map_, Struct } from '../type';
@@ -25,8 +25,8 @@ export class MapVector<T extends { [key: string]: DataType } = any> extends Base
         return Vector.new(this.data.clone(new Struct<T>(this.type.children)));
     }
     // @ts-ignore
-    private _rowProxy: Row<T>;
-    public get rowProxy(): Row<T> {
-        return this._rowProxy || (this._rowProxy = Row.new<T>(this.type.children || [], true));
+    private _rowProxy: RowProxyGenerator<T>;
+    public get rowProxy(): RowProxyGenerator<T> {
+        return this._rowProxy || (this._rowProxy = RowProxyGenerator.new<T>(this, this.type.children || [], true));
     }
 }
diff --git a/js/src/vector/row.ts b/js/src/vector/row.ts
index 62fb3b6..54dcd7f 100644
--- a/js/src/vector/row.ts
+++ b/js/src/vector/row.ts
@@ -17,84 +17,97 @@
 
 import { Field } from '../schema';
 import { MapVector } from '../vector/map';
-import { DataType, RowLike } from '../type';
+import { DataType } from '../type';
 import { valueToString } from '../util/pretty';
 import { StructVector } from '../vector/struct';
 
-/** @ignore */ const columnDescriptor = { enumerable: true, configurable: false, get: () => {} };
-/** @ignore */ const lengthDescriptor = { writable: false, enumerable: false, configurable: false, value: -1 };
-/** @ignore */ const rowIndexDescriptor = { writable: false, enumerable: false, configurable: true, value: null as any };
+/** @ignore */ export const kLength = Symbol.for('length');
+/** @ignore */ export const kParent = Symbol.for('parent');
+/** @ignore */ export const kRowIndex = Symbol.for('rowIndex');
+/** @ignore */ const columnDescriptor = { enumerable: true, configurable: false, get: null as any };
+/** @ignore */ const rowLengthDescriptor = { writable: false, enumerable: false, configurable: false, value: -1 };
 /** @ignore */ const rowParentDescriptor = { writable: false, enumerable: false, configurable: false, value: null as any };
-/** @ignore */ const row = { parent: rowParentDescriptor, rowIndex: rowIndexDescriptor };
 
-/** @ignore */
 export class Row<T extends { [key: string]: DataType }> implements Iterable<T[keyof T]['TValue']> {
     [key: string]: T[keyof T]['TValue'];
-    /** @nocollapse */
-    public static new<T extends { [key: string]: DataType }>(schemaOrFields: T | Field[], fieldsAreEnumerable = false): RowLike<T> & Row<T> {
-        let schema: T, fields: Field[];
-        if (Array.isArray(schemaOrFields)) {
-            fields = schemaOrFields;
-        } else {
-            schema = schemaOrFields;
-            fieldsAreEnumerable = true;
-            fields = Object.keys(schema).map((x) => new Field(x, schema[x]));
-        }
-        return new Row<T>(fields, fieldsAreEnumerable) as RowLike<T> & Row<T>;
-    }
     // @ts-ignore
-    private parent: TParent;
+    public [kParent]: MapVector<T> | StructVector<T>;
     // @ts-ignore
-    private rowIndex: number;
+    public [kRowIndex]: number;
     // @ts-ignore
-    public readonly length: number;
-    private constructor(fields: Field[], fieldsAreEnumerable: boolean) {
-        lengthDescriptor.value = fields.length;
-        Object.defineProperty(this, 'length', lengthDescriptor);
-        fields.forEach((field, columnIndex) => {
-            columnDescriptor.get = this._bindGetter(columnIndex);
-            // set configurable to true to ensure Object.defineProperty
-            // doesn't throw in the case of duplicate column names
-            columnDescriptor.configurable = true;
-            columnDescriptor.enumerable = fieldsAreEnumerable;
-            Object.defineProperty(this, field.name, columnDescriptor);
-            columnDescriptor.configurable = false;
-            columnDescriptor.enumerable = !fieldsAreEnumerable;
-            Object.defineProperty(this, columnIndex, columnDescriptor);
-            columnDescriptor.get = null as any;
-        });
-    }
-    *[Symbol.iterator](this: RowLike<T>) {
-        for (let i = -1, n = this.length; ++i < n;) {
+    public readonly [kLength]: number;
+    *[Symbol.iterator]() {
+        for (let i = -1, n = this[kLength]; ++i < n;) {
             yield this[i];
         }
     }
-    private _bindGetter(colIndex: number) {
-        return function (this: Row<T>) {
-            let child = this.parent.getChildAt(colIndex);
-            return child ? child.get(this.rowIndex) : null;
-        };
-    }
     public get<K extends keyof T>(key: K) { return (this as any)[key] as T[K]['TValue']; }
-    public bind<TParent extends MapVector<T> | StructVector<T>>(parent: TParent, rowIndex: number) {
-        rowIndexDescriptor.value = rowIndex;
-        rowParentDescriptor.value = parent;
-        const bound = Object.create(this, row);
-        rowIndexDescriptor.value = null;
-        rowParentDescriptor.value = null;
-        return bound as RowLike<T>;
-    }
     public toJSON(): any {
-        return DataType.isStruct(this.parent.type) ? [...this] :
+        return DataType.isStruct(this[kParent].type) ? [...this] :
             Object.getOwnPropertyNames(this).reduce((props: any, prop: string) => {
                 return (props[prop] = (this as any)[prop]) && props || props;
             }, {});
     }
     public toString() {
-        return DataType.isStruct(this.parent.type) ?
+        return DataType.isStruct(this[kParent].type) ?
             [...this].map((x) => valueToString(x)).join(', ') :
             Object.getOwnPropertyNames(this).reduce((props: any, prop: string) => {
                 return (props[prop] = valueToString((this as any)[prop])) && props || props;
             }, {});
     }
 }
+
+/** @ignore */
+export class RowProxyGenerator<T extends { [key: string]: DataType }> {
+    /** @nocollapse */
+    public static new<T extends { [key: string]: DataType }>(parent: MapVector<T> | StructVector<T>, schemaOrFields: T | Field[], fieldsAreEnumerable = false): RowProxyGenerator<T> {
+        let schema: T, fields: Field[];
+        if (Array.isArray(schemaOrFields)) {
+            fields = schemaOrFields;
+        } else {
+            schema = schemaOrFields;
+            fieldsAreEnumerable = true;
+            fields = Object.keys(schema).map((x) => new Field(x, schema[x]));
+        }
+        return new RowProxyGenerator<T>(parent, fields, fieldsAreEnumerable);
+    }
+
+    private rowPrototype: Row<T>;
+
+    private constructor(parent: MapVector<T> | StructVector<T>, fields: Field[], fieldsAreEnumerable: boolean) {
+        const proto = Object.create(Row.prototype);
+
+        rowParentDescriptor.value = parent;
+        rowLengthDescriptor.value = fields.length;
+        Object.defineProperty(proto, kParent, rowParentDescriptor);
+        Object.defineProperty(proto, kLength, rowLengthDescriptor);
+        fields.forEach((field, columnIndex) => {
+            if (!proto.hasOwnProperty(field.name)) {
+                columnDescriptor.enumerable = fieldsAreEnumerable;
+                columnDescriptor.get || (columnDescriptor.get = this._bindGetter(columnIndex));
+                Object.defineProperty(proto, field.name, columnDescriptor);
+            }
+            if (!proto.hasOwnProperty(columnIndex)) {
+                columnDescriptor.enumerable = !fieldsAreEnumerable;
+                columnDescriptor.get || (columnDescriptor.get = this._bindGetter(columnIndex));
+                Object.defineProperty(proto, columnIndex, columnDescriptor);
+            }
+            columnDescriptor.get = null as any;
+        });
+
+        this.rowPrototype = proto;
+    }
+
+    private _bindGetter(columnIndex: number) {
+        return function(this: Row<T>) {
+            const child = this[kParent].getChildAt(columnIndex);
+            return child ? child.get(this[kRowIndex]) : null;
+        };
+    }
+    public bind(rowIndex: number) {
+        const bound = Object.create(this.rowPrototype);
+        bound[kRowIndex] = rowIndex;
+        return bound;
+        //return new this.RowProxy(rowIndex);
+    }
+}
diff --git a/js/src/vector/struct.ts b/js/src/vector/struct.ts
index 4ad57ff..e1596d6 100644
--- a/js/src/vector/struct.ts
+++ b/js/src/vector/struct.ts
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import { Row } from './row';
+import { RowProxyGenerator } from './row';
 import { Vector } from '../vector';
 import { BaseVector } from './base';
 import { DataType, Map_, Struct } from '../type';
@@ -25,8 +25,8 @@ export class StructVector<T extends { [key: string]: DataType } = any> extends B
         return Vector.new(this.data.clone(new Map_<T>(this.type.children, keysSorted)));
     }
     // @ts-ignore
-    private _rowProxy: Row<T>;
-    public get rowProxy(): Row<T> {
-        return this._rowProxy || (this._rowProxy = Row.new<T>(this.type.children || [], false));
+    private _rowProxy: RowProxyGenerator<T>;
+    public get rowProxy(): RowProxyGenerator<T> {
+        return this._rowProxy || (this._rowProxy = RowProxyGenerator.new<T>(this, this.type.children || [], false));
     }
 }
diff --git a/js/src/visitor/get.ts b/js/src/visitor/get.ts
index 67909ea..4aa134b 100644
--- a/js/src/visitor/get.ts
+++ b/js/src/visitor/get.ts
@@ -211,7 +211,7 @@ const getNested = <
     S extends { [key: string]: DataType },
     V extends Vector<Map_<S>> | Vector<Struct<S>>
 >(vector: V, index: number): V['TValue'] => {
-    return vector.rowProxy.bind(vector, index);
+    return vector.rowProxy.bind(index);
 };
 
 /* istanbul ignore next */