You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/09/22 01:28:13 UTC
arrow git commit: ARROW-1590: [JS] Flow TS Table method generics
Repository: arrow
Updated Branches:
refs/heads/master 8fd73b48c -> 8996a4ff3
ARROW-1590: [JS] Flow TS Table method generics
This PR fixes the Table generics to infer the types from the call site:
![kapture 2017-09-21 at 4 03 34](https://user-images.githubusercontent.com/178183/30692953-5b8638d6-9e82-11e7-9d66-b87eb50f0e3f.gif)
@wesm this PR also includes the fixes to the prepublish script I mentioned yesterday.
Author: Paul Taylor <pa...@me.com>
Closes #1120 from trxcllnt/fix-ts-typings and squashes the following commits:
73d8eee [Paul Taylor] make package the default gulp task
1d269fe [Paul Taylor] flow table method generics
dd1e819 [Paul Taylor] more defensively typed reader internal values
ac6a778 [Paul Taylor] add comments explaining ARROW-1363 reader workaround
e37f885 [Paul Taylor] fix gulp and prepublish scripts
58fa201 [Paul Taylor] enforce exact dependency package versions
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/8996a4ff
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/8996a4ff
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/8996a4ff
Branch: refs/heads/master
Commit: 8996a4ff3ca006983fb19f07d3530180e65a93d1
Parents: 8fd73b4
Author: Paul Taylor <pa...@me.com>
Authored: Thu Sep 21 21:28:07 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Thu Sep 21 21:28:07 2017 -0400
----------------------------------------------------------------------
js/.gitignore | 3 +-
js/.npmrc | 1 +
js/gulpfile.js | 63 +++++++++++++++-------------
js/package.json | 91 ++++++++++++++++++----------------------
js/prepublish.sh | 6 +--
js/src/reader/dictionary.ts | 11 ++---
js/src/reader/vector.ts | 19 +++++----
js/src/table.ts | 28 +++++++++----
8 files changed, 115 insertions(+), 107 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/8996a4ff/js/.gitignore
----------------------------------------------------------------------
diff --git a/js/.gitignore b/js/.gitignore
index b48f35b..6d0f88d 100644
--- a/js/.gitignore
+++ b/js/.gitignore
@@ -57,7 +57,8 @@ build/Release
node_modules/
jspm_packages/
-# Typescript v1 declaration files
+# Typescript declaration files
+types/
typings/
# Optional npm cache directory
http://git-wip-us.apache.org/repos/asf/arrow/blob/8996a4ff/js/.npmrc
----------------------------------------------------------------------
diff --git a/js/.npmrc b/js/.npmrc
index 43c97e7..71ffabd 100644
--- a/js/.npmrc
+++ b/js/.npmrc
@@ -1 +1,2 @@
+save-prefix=
package-lock=false
http://git-wip-us.apache.org/repos/asf/arrow/blob/8996a4ff/js/gulpfile.js
----------------------------------------------------------------------
diff --git a/js/gulpfile.js b/js/gulpfile.js
index 9c945aa..9f8e564 100644
--- a/js/gulpfile.js
+++ b/js/gulpfile.js
@@ -63,14 +63,17 @@ for (const [target, format] of combinations([`all`, `all`])) {
gulp.task(`clean:${combo}`, gulp.series(cleanTask(target, format, combo, `targets/${target}/${format}`)));
gulp.task(`build:${combo}`, gulp.series(buildTask(target, format, combo, `targets/${target}/${format}`)));
gulp.task(`bundle:${combo}`, gulp.series(bundleTask(target, format, combo, `targets/${target}/${format}`)));
+ gulp.task(`package:${combo}`, gulp.series(packageTask(target, format, combo, `targets/${target}/${format}`)));
gulp.task(`test:debug:${combo}`, gulp.series(testTask(target, format, combo, `targets/${target}/${format}`, true)));
}
gulp.task(`test`, gulp.series(runTaskCombos(`test`)));
gulp.task(`clean`, gulp.parallel(runTaskCombos(`clean`)));
-gulp.task(`build`, gulp.parallel(runTaskCombos(`bundle`)));
+gulp.task(`build`, gulp.parallel(runTaskCombos(`build`)));
+gulp.task(`bundle`, gulp.parallel(runTaskCombos(`bundle`)));
+gulp.task(`package`, gulp.parallel(runTaskCombos(`package`)));
gulp.task(`test:debug`, gulp.series(runTaskCombos(`test:debug`)));
-gulp.task(`default`, gulp.task(`build`));
+gulp.task(`default`, gulp.task(`package`));
function runTaskCombos(name) {
const combos = [];
@@ -87,7 +90,7 @@ function cleanTask(target, format, taskName, outDir) {
return function cleanTask() {
const globs = [`${outDir}/**`];
if (target === `es5` && format === `cjs`) {
- globs.push(`typings`);
+ globs.push(`types`, `typings`);
}
return del(globs);
};
@@ -100,30 +103,32 @@ function buildTask(target, format, taskName, outDir) {
}
function bundleTask(target, format, taskName, outDir) {
- return [
- [`build:${taskName}`],
- function bundleTask() {
- return streamMerge([
- pump(gulp.src([`LICENSE`, `README.md`]), gulp.dest(outDir), onError),
- pump(
- gulp.src(`package.json`),
- gulpJsonTransform((orig) => [
- `version`, `description`,
- `author`, `homepage`, `bugs`, `license`,
- `keywords`, `repository`, `peerDependencies`
- ].reduce((copy, key) => (
- (copy[key] = orig[key]) && copy || copy
- ), {
- main: `Arrow.js`,
- typings: `Arrow.d.ts`,
- name: `@apache-arrow/${target}-${format}`
- }), 2),
- gulp.dest(outDir),
- onError
- )
- ])
- }
- ];
+ return function bundleTask() {
+ return streamMerge([
+ pump(gulp.src([`LICENSE`, `README.md`]), gulp.dest(outDir), onError),
+ pump(
+ gulp.src(`package.json`),
+ gulpJsonTransform((orig) => [
+ `version`, `description`, `keywords`,
+ `repository`, `author`, `homepage`, `bugs`, `license`,
+ `dependencies`, `peerDependencies`
+ ].reduce((copy, key) => (
+ (copy[key] = orig[key]) && copy || copy
+ ), {
+ main: `Arrow.js`,
+ types: `Arrow.d.ts`,
+ typings: `Arrow.d.ts`,
+ name: `@apache-arrow/${target}-${format}`
+ }), 2),
+ gulp.dest(outDir),
+ onError
+ )
+ ]);
+ }
+}
+
+function packageTask(target, format, taskName, outDir) {
+ return [`build:${taskName}`, `bundle:${taskName}`];
}
function testTask(target, format, taskName, outDir, debug) {
@@ -240,10 +245,10 @@ function typescriptTask(target, format, taskName, outDir) {
js = [js, sourcemaps.write(), gulp.dest(outDir)];
// copy types to the root
if (target === `es5` && format === `cjs`) {
- dts.push(gulp.dest(`typings`));
+ dts.push(gulp.dest(`types`));
}
tsProjects.push({
- target, format,
+ target, format,
js: js = pump(...js, onError),
dts: dts = pump(...dts, onError)
});
http://git-wip-us.apache.org/repos/asf/arrow/blob/8996a4ff/js/package.json
----------------------------------------------------------------------
diff --git a/js/package.json b/js/package.json
index e6848fd..03687a8 100644
--- a/js/package.json
+++ b/js/package.json
@@ -1,26 +1,27 @@
{
"name": "apache-arrow",
"version": "0.1.2",
+ "types": "./types/Arrow.d.ts",
+ "typings": "./types/Arrow.d.ts",
"main": "./targets/es5/cjs/Arrow.js",
"module": "./targets/es5/esm/Arrow.js",
"browser": "./targets/es5/umd/Arrow.js",
"jsnext:main": "./targets/es2015/esm/Arrow.js",
"esnext:main": "./targets/esnext/esm/Arrow.js",
- "typings": "./typings/Arrow.d.ts",
"description": "Apache Arrow columnar in-memory format",
"scripts": {
"lerna": "lerna",
- "commit": "git-cz",
"test": "gulp test",
"build": "gulp build",
"clean": "gulp clean",
+ "bundle": "gulp bundle",
+ "package": "gulp package",
"perf": "node ./perf/index.js",
"test:debug": "gulp test:debug",
"test:coverage": "gulp test -t esnext -m esm --coverage",
- "validate": "npm-run-all lint build test",
+ "validate": "npm-run-all clean lint build test bundle",
"lerna:publish": "lerna exec --bail=false npm publish",
"prepublishOnly": "sh ./prepublish.sh",
- "commitmsg": "validate-commit-msg",
"doc": "shx rm -rf ./doc && esdoc",
"lint": "npm-run-all -p lint:*",
"lint:src": "tslint --fix --type-check -p tsconfig.json -c tslint.json \"src/**/*.ts\"",
@@ -43,64 +44,52 @@
"files": [
"src",
"dist",
- "typings",
+ "types",
"targets",
"LICENSE",
"README.md"
],
"peerDependencies": {
- "tslib": "^1.7.1"
+ "tslib": "~1.7.1",
+ "command-line-usage": "4.0.1"
},
"dependencies": {
- "command-line-args": "~4.0.7",
- "command-line-usage": "~4.0.1",
- "flatbuffers": "~1.7.0",
- "text-encoding": "~0.6.4"
+ "flatbuffers": "1.7.0",
+ "text-encoding": "0.6.4"
},
"devDependencies": {
- "@types/flatbuffers": "~1.6.4",
- "@types/jest": "~20.0.8",
- "@types/node": "~8.0.24",
+ "@types/flatbuffers": "1.6.4",
+ "@types/jest": "20.0.8",
+ "@types/node": "^8.0.24",
"@types/text-encoding": "0.0.32",
- "benchmark": "~2.1.4",
- "commitizen": "~2.9.6",
- "conventional-changelog-cli": "~1.3.2",
- "conventional-commits-detector": "~0.1.1",
- "conventional-github-releaser": "~1.1.12",
- "conventional-recommended-bump": "~1.0.1",
- "coveralls": "~2.13.1",
- "cz-conventional-changelog": "~2.0.0",
- "del": "~3.0.0",
- "esdoc": "~1.0.1",
- "esdoc-standard-plugin": "~1.0.0",
- "google-closure-compiler": "~20170910.0.0",
+ "benchmark": "2.1.4",
+ "coveralls": "2.13.1",
+ "command-line-args": "4.0.7",
+ "del": "3.0.0",
+ "esdoc": "1.0.3",
+ "esdoc-standard-plugin": "1.0.0",
+ "google-closure-compiler": "20170910.0.0",
"gulp": "github:gulpjs/gulp#4.0",
- "gulp-json-transform": "~0.4.2",
- "gulp-sourcemaps": "~2.6.1",
- "gulp-typescript": "~3.2.2",
- "jest": "~21.1.0",
- "jest-environment-node-debug": "~2.0.0",
- "json": "~9.0.6",
- "lerna": "~2.1.2",
- "lint-staged": "~4.2.1",
- "merge2": "~1.2.0",
- "mkdirp": "~0.5.1",
- "npm-run-all": "~4.1.1",
- "pump": "~1.0.2",
- "rimraf": "~2.6.1",
- "shx": "~0.2.2",
- "text-encoding-utf-8": "~1.0.1",
- "trash": "~4.0.1",
- "ts-jest": "~21.0.1",
- "tslib": "~1.7.1",
- "tslint": "~5.7.0",
- "typescript": "~2.5.2",
- "validate-commit-msg": "~2.14.0"
- },
- "config": {
- "commitizen": {
- "path": "cz-conventional-changelog"
- }
+ "gulp-json-transform": "0.4.2",
+ "gulp-sourcemaps": "2.6.1",
+ "gulp-typescript": "3.2.2",
+ "jest": "21.1.0",
+ "jest-environment-node-debug": "2.0.0",
+ "json": "9.0.6",
+ "lerna": "2.2.0",
+ "lint-staged": "4.2.1",
+ "merge2": "1.2.0",
+ "mkdirp": "0.5.1",
+ "npm-run-all": "4.1.1",
+ "pump": "1.0.2",
+ "rimraf": "2.6.2",
+ "shx": "0.2.2",
+ "text-encoding-utf-8": "1.0.1",
+ "trash": "4.1.0",
+ "ts-jest": "21.0.1",
+ "tslib": "1.7.1",
+ "tslint": "5.7.0",
+ "typescript": "2.5.2"
},
"lint-staged": {
"*.@(ts)": [
http://git-wip-us.apache.org/repos/asf/arrow/blob/8996a4ff/js/prepublish.sh
----------------------------------------------------------------------
diff --git a/js/prepublish.sh b/js/prepublish.sh
index 4ad8db1..b40504a 100644
--- a/js/prepublish.sh
+++ b/js/prepublish.sh
@@ -17,10 +17,10 @@
# specific language governing permissions and limitations
# under the License.
+npm run clean
npm run lint
npm run build
npm run test
-preset=`conventional-commits-detector` && echo $preset
-bump=`conventional-recommended-bump -p $preset` && echo $bump
-npm --no-git-tag-version version $bump &>/dev/null
+npm --no-git-tag-version version patch &>/dev/null
+npm run bundle
npm run lerna:publish
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/arrow/blob/8996a4ff/js/src/reader/dictionary.ts
----------------------------------------------------------------------
diff --git a/js/src/reader/dictionary.ts b/js/src/reader/dictionary.ts
index aef2bc9..abf7ac3 100644
--- a/js/src/reader/dictionary.ts
+++ b/js/src/reader/dictionary.ts
@@ -19,21 +19,18 @@ import { readVector } from './vector';
import { MessageBatch } from './message';
import * as Schema_ from '../format/Schema_generated';
import { IteratorState, Dictionaries } from './arrow';
-
import Field = Schema_.org.apache.arrow.flatbuf.Field;
-import DictionaryEncoding = Schema_.org.apache.arrow.flatbuf.DictionaryEncoding;
-export function* readDictionaries(field: Field,
+export function* readDictionaries(field: Field | null,
batch: MessageBatch,
iterator: IteratorState,
dictionaries: Dictionaries) {
- let id: string, encoding: DictionaryEncoding;
- if ((encoding = field.dictionary()) &&
- batch.id === (id = encoding.id().toFloat64().toString())) {
+ let id: string, encoding = field && field.dictionary();
+ if (encoding && batch.id === (id = encoding.id().toFloat64().toString())) {
yield [id, readVector(field, batch, iterator, null)];
return;
}
- for (let i = -1, n = field.childrenLength(); ++i < n;) {
+ for (let i = -1, n = field && field.childrenLength() || 0; ++i < n;) {
// Since a dictionary batch can only contain a single vector, return early after we find it
for (let result of readDictionaries(field.children(i), batch, iterator, dictionaries)) {
yield result;
http://git-wip-us.apache.org/repos/asf/arrow/blob/8996a4ff/js/src/reader/vector.ts
----------------------------------------------------------------------
diff --git a/js/src/reader/vector.ts b/js/src/reader/vector.ts
index a3cd798..3b6663b 100644
--- a/js/src/reader/vector.ts
+++ b/js/src/reader/vector.ts
@@ -54,12 +54,13 @@ function readTypedVector(field: Field, batch: MessageBatch, iterator: IteratorSt
}
function readDictionaryVector(field: Field, batch: MessageBatch, iterator: IteratorState, dictionaries: Dictionaries) {
- let encoding: DictionaryEncoding;
+ let encoding: DictionaryEncoding | null;
if (dictionaries && (encoding = field.dictionary())) {
let id = encoding.id().toFloat64().toString();
let fieldType = encoding.indexType() ||
/* a dictionary index defaults to signed 32 bit int if unspecified */
{ bitWidth: () => 32, isSigned: () => true };
+ // workaround for https://issues.apache.org/jira/browse/ARROW-1363
let indexField = createSyntheticDictionaryIndexField(field, fieldType);
let index = readIntVector(indexField, batch, iterator, null, fieldType);
return DictionaryVector.create(field, index.length, index, dictionaries[id]);
@@ -105,12 +106,16 @@ function createIntVector(field, length, data, validity, offsets, fieldType, batc
let type = fieldType || field.type(new Int()), bitWidth = type.bitWidth();
let Vector = valueForBitWidth(bitWidth, intVectors)[+type.isSigned()];
return Vector.create(field, length, validity, data || offsets);
- // ---------------------- so this is kinda strange 👆:
- // The dictionary encoded vectors I generated from sample mapd-core queries have the indicies' data buffers
- // tagged as VectorType.OFFSET (0) in the field metadata. The current TS impl ignores buffers' layout type,
- // and assumes the second buffer is the data for a NullableIntVector. Since we've been stricter about enforcing
- // the Arrow spec while parsing, the IntVector's data buffer reads empty in this case. If so, fallback to using
- // the offsets buffer as the data, since IntVectors don't have offsets.
+ // ----------------------------------------------- 👆:
+ // Workaround for https://issues.apache.org/jira/browse/ARROW-1363
+ // This bug causes dictionary encoded vector indicies' IntVector data
+ // buffers to be tagged as VectorType.OFFSET (0) in the field metadata
+ // instead of VectorType.DATA. The `readVectorLayout` routine strictly
+ // obeys the types in the field metadata, so if we're parsing an Arrow
+ // file written by a version of the library published before ARROW-1363
+ // was fixed, the IntVector's data buffer will be null, and the offset
+ // buffer will be the actual data. If data is null, it's safe to assume
+ // the offset buffer is the data, because IntVectors don't have offsets.
}
const readFloatVector = readVectorLayout<number, FloatArray>(
http://git-wip-us.apache.org/repos/asf/arrow/blob/8996a4ff/js/src/table.ts
----------------------------------------------------------------------
diff --git a/js/src/table.ts b/js/src/table.ts
index 999bb24..5e78105 100644
--- a/js/src/table.ts
+++ b/js/src/table.ts
@@ -19,6 +19,8 @@ import { readBuffers } from './reader/arrow';
import { StructVector } from './vector/struct';
import { Vector, sliceToRangeArgs } from './vector/vector';
+export type RowObject = { [k: string]: any };
+
export class Table implements Iterable<Map<string, any>> {
public length: number;
protected _columns: Vector<any>[];
@@ -66,22 +68,30 @@ export class Table implements Iterable<Map<string, any>> {
yield column;
}
}
+ getRow(rowIndex: number): RowObject;
+ getRow(rowIndex: number, compact: boolean): Array<any>;
getRow(rowIndex: number, compact?: boolean) {
return (compact && rowAsArray || rowAsObject)(rowIndex, this._columns);
}
- getCell(columnName: string, rowIndex: number) {
- return this.getColumn(columnName).get(rowIndex);
+ getCell<T extends any>(columnName: string, rowIndex: number) {
+ return this.getColumn<Vector<T>>(columnName).get(rowIndex);
}
- getCellAt(columnIndex: number, rowIndex: number) {
- return this.getColumnAt(columnIndex).get(rowIndex);
+ getCellAt<T extends any>(columnIndex: number, rowIndex: number) {
+ return this.getColumnAt<Vector<T>>(columnIndex).get(rowIndex);
}
- getColumn<T = any>(columnName: string) {
- return this._columnsMap[columnName] as Vector<T>;
+ getColumn<T extends Vector<any>>(columnName: string) {
+ return this._columnsMap[columnName] as T;
}
- getColumnAt<T = any>(columnIndex: number) {
- return this._columns[columnIndex] as Vector<T>;
+ getColumnAt<T extends Vector<any>>(columnIndex: number) {
+ return this._columns[columnIndex] as T;
}
- toString({ index = false } = {}) {
+ toString(): string;
+ toString(index: boolean): string;
+ toString(options: { index: boolean }): string;
+ toString(options?: any) {
+ const index = typeof options === 'object' ? options && !!options.index
+ : typeof options === 'boolean' ? !!options
+ : false;
const { length } = this;
if (length <= 0) { return ''; }
const maxColumnWidths = [];