You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by bh...@apache.org on 2019/02/13 03:55:30 UTC
[arrow] branch master updated: ARROW-4523,
ARROW-4524: [JS] Add row proxy generation benchmark, improve performance
This is an automated email from the ASF dual-hosted git repository.
bhulette pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 75b3166 ARROW-4523,ARROW-4524: [JS] Add row proxy generation benchmark, improve performance
75b3166 is described below
commit 75b31660819d30f9d579530123806430ca359494
Author: Brian Hulette <hu...@gmail.com>
AuthorDate: Tue Feb 12 19:55:15 2019 -0800
ARROW-4523,ARROW-4524: [JS] Add row proxy generation benchmark, improve performance
It seems that using `Object.create` in the `bind` method slows down proxy generation substantially. I separated the `Row` and `RowProxyGenerator` concepts - now the generator creates a prototype based on `Row` and the parent vector when it is created, and constructs instances based on that prototype repeatedly in `bind`.
This change improved the included benchmark from 1,114.78 ms to 154.96 ms on my laptop (Intel i5-7200U).
Author: Brian Hulette <hu...@gmail.com>
Closes #3601 from TheNeuralBit/proxy-bench and squashes the following commits:
24425452 <Brian Hulette> Remove inner class, don't re-define columns
7bb6e4f2 <Brian Hulette> Update comparator, switch to Symbols for internal variables in Row
4079439e <Brian Hulette> linter fixes
da4d97c2 <Brian Hulette> Switch back to Object.create with no extra property descriptor
8a5d1629 <Brian Hulette> Improve Row proxy performance
fb7a0f04 <Brian Hulette> add row proxy benchmark
---
js/perf/index.js | 11 ++++
js/src/util/vector.ts | 137 ++++++++++++++++++++++++++++++++----------------
js/src/vector/map.ts | 8 +--
js/src/vector/row.ts | 123 ++++++++++++++++++++++++-------------------
js/src/vector/struct.ts | 8 +--
js/src/visitor/get.ts | 2 +-
6 files changed, 179 insertions(+), 110 deletions(-)
diff --git a/js/perf/index.js b/js/perf/index.js
index 0e9c2bd..2129b9d 100644
--- a/js/perf/index.js
+++ b/js/perf/index.js
@@ -44,10 +44,12 @@ for (let { name, buffers } of require('./table_config')) {
for (let {name, buffers, countBys, counts} of require('./table_config')) {
const table = Table.from(buffers);
+ const tableIterateSuiteName = `Table Iterate "${name}"`;
const dfCountBySuiteName = `DataFrame Count By "${name}"`;
const dfFilterCountSuiteName = `DataFrame Filter-Scan Count "${name}"`;
const dfDirectCountSuiteName = `DataFrame Direct Count "${name}"`;
+ suites.push(createTestSuite(tableIterateSuiteName, createTableIterateTest(table)));
suites.push(...countBys.map((countBy) => createTestSuite(dfCountBySuiteName, createDataFrameCountByTest(table, countBy))));
suites.push(...counts.map(({ col, test, value }) => createTestSuite(dfFilterCountSuiteName, createDataFrameFilterCountTest(table, col, test, value))));
suites.push(...counts.map(({ col, test, value }) => createTestSuite(dfDirectCountSuiteName, createDataFrameDirectCountTest(table, col, test, value))));
@@ -135,6 +137,15 @@ function createGetByIndexTest(vector, name) {
};
}
+function createTableIterateTest(table) {
+ let value;
+ return {
+ async: true,
+ name: `length: ${table.length}\n`,
+ fn() { for (value of table) {} }
+ };
+}
+
function createDataFrameDirectCountTest(table, column, test, value) {
let sum, colidx = table.schema.fields.findIndex((c)=>c.name === column);
diff --git a/js/src/util/vector.ts b/js/src/util/vector.ts
index 92f348d..8acf42e 100644
--- a/js/src/util/vector.ts
+++ b/js/src/util/vector.ts
@@ -16,7 +16,7 @@
// under the License.
import { Vector } from '../vector';
-import { Row } from '../vector/row';
+import { Row, kLength } from '../vector/row';
import { compareArrayLike } from '../util/buffer';
/** @ignore */
@@ -75,60 +75,105 @@ export function createElementComparator(search: any) {
}
// Compare Array-likes
if (Array.isArray(search)) {
- const n = (search as any).length;
- const fns = [] as ((x: any) => boolean)[];
- for (let i = -1; ++i < n;) {
- fns[i] = createElementComparator((search as any)[i]);
- }
- return (value: any) => {
- if (!value || value.length !== n) { return false; }
- // Handle the case where the search element is an Array, but the
- // values are Rows or Vectors, e.g. list.indexOf(['foo', 'bar'])
- if ((value instanceof Row) || (value instanceof Vector)) {
- for (let i = -1, n = value.length; ++i < n;) {
- if (!(fns[i]((value as any).get(i)))) { return false; }
- }
- return true;
- }
- for (let i = -1, n = value.length; ++i < n;) {
- if (!(fns[i](value[i]))) { return false; }
+ return createArrayLikeComparator(search);
+ }
+ // Compare Rows
+ if (search instanceof Row) {
+ return createRowComparator(search);
+ }
+ // Compare Vectors
+ if (search instanceof Vector) {
+ return createVectorComparator(search);
+ }
+ // Compare non-empty Objects
+ const keys = Object.keys(search);
+ if (keys.length > 0) {
+ return createObjectKeysComparator(search, keys);
+ }
+ // No valid comparator
+ return () => false;
+}
+
+/** @ignore */
+function createArrayLikeComparator(search: ArrayLike<any>) {
+ const n = search.length;
+ const fns = [] as ((x: any) => boolean)[];
+ for (let i = -1; ++i < n;) {
+ fns[i] = createElementComparator((search as any)[i]);
+ }
+ return (value: any) => {
+ if (!value) { return false; }
+ // Handle the case where the search element is an Array, but the
+ // values are Rows or Vectors, e.g. list.indexOf(['foo', 'bar'])
+ if (value instanceof Row) {
+ if (value[kLength] !== n) { return false; }
+ for (let i = -1; ++i < n;) {
+ if (!(fns[i](value.get(i)))) { return false; }
}
return true;
- };
- }
- // Compare Rows and Vectors
- if ((search instanceof Row) || (search instanceof Vector)) {
- const n = search.length;
- const C = search.constructor as any;
- const fns = [] as ((x: any) => boolean)[];
- for (let i = -1; ++i < n;) {
- fns[i] = createElementComparator((search as any).get(i));
}
- return (value: any) => {
- if (!(value instanceof C)) { return false; }
- if (!(value.length === n)) { return false; }
+ if (value.length !== n) { return false; }
+ if (value instanceof Vector) {
for (let i = -1; ++i < n;) {
if (!(fns[i](value.get(i)))) { return false; }
}
return true;
- };
+ }
+ for (let i = -1; ++i < n;) {
+ if (!(fns[i](value[i]))) { return false; }
+ }
+ return true;
+ };
+}
+
+/** @ignore */
+function createRowComparator(search: Row<any>) {
+ const n = search[kLength];
+ const C = search.constructor as any;
+ const fns = [] as ((x: any) => boolean)[];
+ for (let i = -1; ++i < n;) {
+ fns[i] = createElementComparator(search.get(i));
}
- // Compare non-empty Objects
- const keys = Object.keys(search);
- if (keys.length > 0) {
- const n = keys.length;
- const fns = [] as ((x: any) => boolean)[];
+ return (value: any) => {
+ if (!(value instanceof C)) { return false; }
+ if (!(value[kLength] === n)) { return false; }
for (let i = -1; ++i < n;) {
- fns[i] = createElementComparator(search[keys[i]]);
+ if (!(fns[i](value.get(i)))) { return false; }
}
- return (value: any) => {
- if (!value || typeof value !== 'object') { return false; }
- for (let i = -1; ++i < n;) {
- if (!(fns[i](value[keys[i]]))) { return false; }
- }
- return true;
- };
+ return true;
+ };
+}
+
+/** @ignore */
+function createVectorComparator(search: Vector<any>) {
+ const n = search.length;
+ const C = search.constructor as any;
+ const fns = [] as ((x: any) => boolean)[];
+ for (let i = -1; ++i < n;) {
+ fns[i] = createElementComparator((search as any).get(i));
}
- // No valid comparator
- return () => false;
+ return (value: any) => {
+ if (!(value instanceof C)) { return false; }
+ if (!(value.length === n)) { return false; }
+ for (let i = -1; ++i < n;) {
+ if (!(fns[i](value.get(i)))) { return false; }
+ }
+ return true;
+ };
+}
+
+/** @ignore */
+function createObjectKeysComparator(search: any, keys: string[]) {
+ const n = keys.length;
+ const fns = [] as ((x: any) => boolean)[];
+ for (let i = -1; ++i < n;) {
+ fns[i] = createElementComparator(search[keys[i]]);
+ }
+ return (value: any) => {
+ if (!value || typeof value !== 'object') { return false; }
+ for (let i = -1; ++i < n;) {
+ if (!(fns[i](value[keys[i]]))) { return false; }
+ }
+ return true;
+ };
}
diff --git a/js/src/vector/map.ts b/js/src/vector/map.ts
index 27c51a6..54956d7 100644
--- a/js/src/vector/map.ts
+++ b/js/src/vector/map.ts
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-import { Row } from './row';
+import { RowProxyGenerator } from './row';
import { Vector } from '../vector';
import { BaseVector } from './base';
import { DataType, Map_, Struct } from '../type';
@@ -25,8 +25,8 @@ export class MapVector<T extends { [key: string]: DataType } = any> extends Base
return Vector.new(this.data.clone(new Struct<T>(this.type.children)));
}
// @ts-ignore
- private _rowProxy: Row<T>;
- public get rowProxy(): Row<T> {
- return this._rowProxy || (this._rowProxy = Row.new<T>(this.type.children || [], true));
+ private _rowProxy: RowProxyGenerator<T>;
+ public get rowProxy(): RowProxyGenerator<T> {
+ return this._rowProxy || (this._rowProxy = RowProxyGenerator.new<T>(this, this.type.children || [], true));
}
}
diff --git a/js/src/vector/row.ts b/js/src/vector/row.ts
index 62fb3b6..54dcd7f 100644
--- a/js/src/vector/row.ts
+++ b/js/src/vector/row.ts
@@ -17,84 +17,97 @@
import { Field } from '../schema';
import { MapVector } from '../vector/map';
-import { DataType, RowLike } from '../type';
+import { DataType } from '../type';
import { valueToString } from '../util/pretty';
import { StructVector } from '../vector/struct';
-/** @ignore */ const columnDescriptor = { enumerable: true, configurable: false, get: () => {} };
-/** @ignore */ const lengthDescriptor = { writable: false, enumerable: false, configurable: false, value: -1 };
-/** @ignore */ const rowIndexDescriptor = { writable: false, enumerable: false, configurable: true, value: null as any };
+/** @ignore */ export const kLength = Symbol.for('length');
+/** @ignore */ export const kParent = Symbol.for('parent');
+/** @ignore */ export const kRowIndex = Symbol.for('rowIndex');
+/** @ignore */ const columnDescriptor = { enumerable: true, configurable: false, get: null as any };
+/** @ignore */ const rowLengthDescriptor = { writable: false, enumerable: false, configurable: false, value: -1 };
/** @ignore */ const rowParentDescriptor = { writable: false, enumerable: false, configurable: false, value: null as any };
-/** @ignore */ const row = { parent: rowParentDescriptor, rowIndex: rowIndexDescriptor };
-/** @ignore */
export class Row<T extends { [key: string]: DataType }> implements Iterable<T[keyof T]['TValue']> {
[key: string]: T[keyof T]['TValue'];
- /** @nocollapse */
- public static new<T extends { [key: string]: DataType }>(schemaOrFields: T | Field[], fieldsAreEnumerable = false): RowLike<T> & Row<T> {
- let schema: T, fields: Field[];
- if (Array.isArray(schemaOrFields)) {
- fields = schemaOrFields;
- } else {
- schema = schemaOrFields;
- fieldsAreEnumerable = true;
- fields = Object.keys(schema).map((x) => new Field(x, schema[x]));
- }
- return new Row<T>(fields, fieldsAreEnumerable) as RowLike<T> & Row<T>;
- }
// @ts-ignore
- private parent: TParent;
+ public [kParent]: MapVector<T> | StructVector<T>;
// @ts-ignore
- private rowIndex: number;
+ public [kRowIndex]: number;
// @ts-ignore
- public readonly length: number;
- private constructor(fields: Field[], fieldsAreEnumerable: boolean) {
- lengthDescriptor.value = fields.length;
- Object.defineProperty(this, 'length', lengthDescriptor);
- fields.forEach((field, columnIndex) => {
- columnDescriptor.get = this._bindGetter(columnIndex);
- // set configurable to true to ensure Object.defineProperty
- // doesn't throw in the case of duplicate column names
- columnDescriptor.configurable = true;
- columnDescriptor.enumerable = fieldsAreEnumerable;
- Object.defineProperty(this, field.name, columnDescriptor);
- columnDescriptor.configurable = false;
- columnDescriptor.enumerable = !fieldsAreEnumerable;
- Object.defineProperty(this, columnIndex, columnDescriptor);
- columnDescriptor.get = null as any;
- });
- }
- *[Symbol.iterator](this: RowLike<T>) {
- for (let i = -1, n = this.length; ++i < n;) {
+ public readonly [kLength]: number;
+ *[Symbol.iterator]() {
+ for (let i = -1, n = this[kLength]; ++i < n;) {
yield this[i];
}
}
- private _bindGetter(colIndex: number) {
- return function (this: Row<T>) {
- let child = this.parent.getChildAt(colIndex);
- return child ? child.get(this.rowIndex) : null;
- };
- }
public get<K extends keyof T>(key: K) { return (this as any)[key] as T[K]['TValue']; }
- public bind<TParent extends MapVector<T> | StructVector<T>>(parent: TParent, rowIndex: number) {
- rowIndexDescriptor.value = rowIndex;
- rowParentDescriptor.value = parent;
- const bound = Object.create(this, row);
- rowIndexDescriptor.value = null;
- rowParentDescriptor.value = null;
- return bound as RowLike<T>;
- }
public toJSON(): any {
- return DataType.isStruct(this.parent.type) ? [...this] :
+ return DataType.isStruct(this[kParent].type) ? [...this] :
Object.getOwnPropertyNames(this).reduce((props: any, prop: string) => {
return (props[prop] = (this as any)[prop]) && props || props;
}, {});
}
public toString() {
- return DataType.isStruct(this.parent.type) ?
+ return DataType.isStruct(this[kParent].type) ?
[...this].map((x) => valueToString(x)).join(', ') :
Object.getOwnPropertyNames(this).reduce((props: any, prop: string) => {
return (props[prop] = valueToString((this as any)[prop])) && props || props;
}, {});
}
}
+
+/** @ignore */
+export class RowProxyGenerator<T extends { [key: string]: DataType }> {
+ /** @nocollapse */
+ public static new<T extends { [key: string]: DataType }>(parent: MapVector<T> | StructVector<T>, schemaOrFields: T | Field[], fieldsAreEnumerable = false): RowProxyGenerator<T> {
+ let schema: T, fields: Field[];
+ if (Array.isArray(schemaOrFields)) {
+ fields = schemaOrFields;
+ } else {
+ schema = schemaOrFields;
+ fieldsAreEnumerable = true;
+ fields = Object.keys(schema).map((x) => new Field(x, schema[x]));
+ }
+ return new RowProxyGenerator<T>(parent, fields, fieldsAreEnumerable);
+ }
+
+ private rowPrototype: Row<T>;
+
+ private constructor(parent: MapVector<T> | StructVector<T>, fields: Field[], fieldsAreEnumerable: boolean) {
+ const proto = Object.create(Row.prototype);
+
+ rowParentDescriptor.value = parent;
+ rowLengthDescriptor.value = fields.length;
+ Object.defineProperty(proto, kParent, rowParentDescriptor);
+ Object.defineProperty(proto, kLength, rowLengthDescriptor);
+ fields.forEach((field, columnIndex) => {
+ if (!proto.hasOwnProperty(field.name)) {
+ columnDescriptor.enumerable = fieldsAreEnumerable;
+ columnDescriptor.get || (columnDescriptor.get = this._bindGetter(columnIndex));
+ Object.defineProperty(proto, field.name, columnDescriptor);
+ }
+ if (!proto.hasOwnProperty(columnIndex)) {
+ columnDescriptor.enumerable = !fieldsAreEnumerable;
+ columnDescriptor.get || (columnDescriptor.get = this._bindGetter(columnIndex));
+ Object.defineProperty(proto, columnIndex, columnDescriptor);
+ }
+ columnDescriptor.get = null as any;
+ });
+
+ this.rowPrototype = proto;
+ }
+
+ private _bindGetter(columnIndex: number) {
+ return function(this: Row<T>) {
+ const child = this[kParent].getChildAt(columnIndex);
+ return child ? child.get(this[kRowIndex]) : null;
+ };
+ }
+ public bind(rowIndex: number) {
+ const bound = Object.create(this.rowPrototype);
+ bound[kRowIndex] = rowIndex;
+ return bound;
+ //return new this.RowProxy(rowIndex);
+ }
+}
diff --git a/js/src/vector/struct.ts b/js/src/vector/struct.ts
index 4ad57ff..e1596d6 100644
--- a/js/src/vector/struct.ts
+++ b/js/src/vector/struct.ts
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-import { Row } from './row';
+import { RowProxyGenerator } from './row';
import { Vector } from '../vector';
import { BaseVector } from './base';
import { DataType, Map_, Struct } from '../type';
@@ -25,8 +25,8 @@ export class StructVector<T extends { [key: string]: DataType } = any> extends B
return Vector.new(this.data.clone(new Map_<T>(this.type.children, keysSorted)));
}
// @ts-ignore
- private _rowProxy: Row<T>;
- public get rowProxy(): Row<T> {
- return this._rowProxy || (this._rowProxy = Row.new<T>(this.type.children || [], false));
+ private _rowProxy: RowProxyGenerator<T>;
+ public get rowProxy(): RowProxyGenerator<T> {
+ return this._rowProxy || (this._rowProxy = RowProxyGenerator.new<T>(this, this.type.children || [], false));
}
}
diff --git a/js/src/visitor/get.ts b/js/src/visitor/get.ts
index 67909ea..4aa134b 100644
--- a/js/src/visitor/get.ts
+++ b/js/src/visitor/get.ts
@@ -211,7 +211,7 @@ const getNested = <
S extends { [key: string]: DataType },
V extends Vector<Map_<S>> | Vector<Struct<S>>
>(vector: V, index: number): V['TValue'] => {
- return vector.rowProxy.bind(vector, index);
+ return vector.rowProxy.bind(index);
};
/* istanbul ignore next */