You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/09/22 01:28:13 UTC

arrow git commit: ARROW-1590: [JS] Flow TS Table method generics

Repository: arrow
Updated Branches:
  refs/heads/master 8fd73b48c -> 8996a4ff3


ARROW-1590: [JS] Flow TS Table method generics

This PR fixes the Table generics to infer the types from the call site:

![kapture 2017-09-21 at 4 03 34](https://user-images.githubusercontent.com/178183/30692953-5b8638d6-9e82-11e7-9d66-b87eb50f0e3f.gif)

@wesm this PR also includes the fixes to the prepublish script I mentioned yesterday.

Author: Paul Taylor <pa...@me.com>

Closes #1120 from trxcllnt/fix-ts-typings and squashes the following commits:

73d8eee [Paul Taylor] make package the default gulp task
1d269fe [Paul Taylor] flow table method generics
dd1e819 [Paul Taylor] more defensively typed reader internal values
ac6a778 [Paul Taylor] add comments explaining ARROW-1363 reader workaround
e37f885 [Paul Taylor] fix gulp and prepublish scripts
58fa201 [Paul Taylor] enforce exact dependency package versions


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/8996a4ff
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/8996a4ff
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/8996a4ff

Branch: refs/heads/master
Commit: 8996a4ff3ca006983fb19f07d3530180e65a93d1
Parents: 8fd73b4
Author: Paul Taylor <pa...@me.com>
Authored: Thu Sep 21 21:28:07 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Thu Sep 21 21:28:07 2017 -0400

----------------------------------------------------------------------
 js/.gitignore               |  3 +-
 js/.npmrc                   |  1 +
 js/gulpfile.js              | 63 +++++++++++++++-------------
 js/package.json             | 91 ++++++++++++++++++----------------------
 js/prepublish.sh            |  6 +--
 js/src/reader/dictionary.ts | 11 ++---
 js/src/reader/vector.ts     | 19 +++++----
 js/src/table.ts             | 28 +++++++++----
 8 files changed, 115 insertions(+), 107 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/8996a4ff/js/.gitignore
----------------------------------------------------------------------
diff --git a/js/.gitignore b/js/.gitignore
index b48f35b..6d0f88d 100644
--- a/js/.gitignore
+++ b/js/.gitignore
@@ -57,7 +57,8 @@ build/Release
 node_modules/
 jspm_packages/
 
-# Typescript v1 declaration files
+# Typescript declaration files
+types/
 typings/
 
 # Optional npm cache directory

http://git-wip-us.apache.org/repos/asf/arrow/blob/8996a4ff/js/.npmrc
----------------------------------------------------------------------
diff --git a/js/.npmrc b/js/.npmrc
index 43c97e7..71ffabd 100644
--- a/js/.npmrc
+++ b/js/.npmrc
@@ -1 +1,2 @@
+save-prefix=
 package-lock=false

http://git-wip-us.apache.org/repos/asf/arrow/blob/8996a4ff/js/gulpfile.js
----------------------------------------------------------------------
diff --git a/js/gulpfile.js b/js/gulpfile.js
index 9c945aa..9f8e564 100644
--- a/js/gulpfile.js
+++ b/js/gulpfile.js
@@ -63,14 +63,17 @@ for (const [target, format] of combinations([`all`, `all`])) {
     gulp.task(`clean:${combo}`, gulp.series(cleanTask(target, format, combo, `targets/${target}/${format}`)));
     gulp.task(`build:${combo}`, gulp.series(buildTask(target, format, combo, `targets/${target}/${format}`)));
     gulp.task(`bundle:${combo}`, gulp.series(bundleTask(target, format, combo, `targets/${target}/${format}`)));
+    gulp.task(`package:${combo}`, gulp.series(packageTask(target, format, combo, `targets/${target}/${format}`)));
     gulp.task(`test:debug:${combo}`, gulp.series(testTask(target, format, combo, `targets/${target}/${format}`, true)));
 }
 
 gulp.task(`test`, gulp.series(runTaskCombos(`test`)));
 gulp.task(`clean`, gulp.parallel(runTaskCombos(`clean`)));
-gulp.task(`build`, gulp.parallel(runTaskCombos(`bundle`)));
+gulp.task(`build`, gulp.parallel(runTaskCombos(`build`)));
+gulp.task(`bundle`, gulp.parallel(runTaskCombos(`bundle`)));
+gulp.task(`package`, gulp.parallel(runTaskCombos(`package`)));
 gulp.task(`test:debug`, gulp.series(runTaskCombos(`test:debug`)));
-gulp.task(`default`, gulp.task(`build`));
+gulp.task(`default`, gulp.task(`package`));
 
 function runTaskCombos(name) {
     const combos = [];
@@ -87,7 +90,7 @@ function cleanTask(target, format, taskName, outDir) {
     return function cleanTask() {
         const globs = [`${outDir}/**`];
         if (target === `es5` && format === `cjs`) {
-            globs.push(`typings`);
+            globs.push(`types`, `typings`);
         }
         return del(globs);
     };
@@ -100,30 +103,32 @@ function buildTask(target, format, taskName, outDir) {
 }
 
 function bundleTask(target, format, taskName, outDir) {
-    return [
-        [`build:${taskName}`],
-        function bundleTask() {
-            return streamMerge([
-                pump(gulp.src([`LICENSE`, `README.md`]), gulp.dest(outDir), onError),
-                pump(
-                    gulp.src(`package.json`),
-                    gulpJsonTransform((orig) => [
-                        `version`, `description`,
-                        `author`, `homepage`, `bugs`, `license`,
-                        `keywords`, `repository`, `peerDependencies`
-                    ].reduce((copy, key) => (
-                        (copy[key] = orig[key]) && copy || copy
-                    ), {
-                        main: `Arrow.js`,
-                        typings: `Arrow.d.ts`,
-                        name: `@apache-arrow/${target}-${format}`
-                    }), 2),
-                    gulp.dest(outDir),
-                    onError
-                )
-            ])
-        }
-    ];
+    return function bundleTask() {
+        return streamMerge([
+            pump(gulp.src([`LICENSE`, `README.md`]), gulp.dest(outDir), onError),
+            pump(
+                gulp.src(`package.json`),
+                gulpJsonTransform((orig) => [
+                    `version`, `description`, `keywords`,
+                    `repository`, `author`, `homepage`, `bugs`, `license`,
+                    `dependencies`, `peerDependencies`
+                ].reduce((copy, key) => (
+                    (copy[key] = orig[key]) && copy || copy
+                ), {
+                    main: `Arrow.js`,
+                    types: `Arrow.d.ts`,
+                    typings: `Arrow.d.ts`,
+                    name: `@apache-arrow/${target}-${format}`
+                }), 2),
+                gulp.dest(outDir),
+                onError
+            )
+        ]);
+    }
+}
+
+function packageTask(target, format, taskName, outDir) {
+    return [`build:${taskName}`, `bundle:${taskName}`];
 }
 
 function testTask(target, format, taskName, outDir, debug) {
@@ -240,10 +245,10 @@ function typescriptTask(target, format, taskName, outDir) {
                 js = [js, sourcemaps.write(), gulp.dest(outDir)];
                 // copy types to the root
                 if (target === `es5` && format === `cjs`) {
-                    dts.push(gulp.dest(`typings`));
+                    dts.push(gulp.dest(`types`));
                 }
                 tsProjects.push({
-                    target, format, 
+                    target, format,
                     js: js = pump(...js, onError),
                     dts: dts = pump(...dts, onError)
                 });

http://git-wip-us.apache.org/repos/asf/arrow/blob/8996a4ff/js/package.json
----------------------------------------------------------------------
diff --git a/js/package.json b/js/package.json
index e6848fd..03687a8 100644
--- a/js/package.json
+++ b/js/package.json
@@ -1,26 +1,27 @@
 {
   "name": "apache-arrow",
   "version": "0.1.2",
+  "types": "./types/Arrow.d.ts",
+  "typings": "./types/Arrow.d.ts",
   "main": "./targets/es5/cjs/Arrow.js",
   "module": "./targets/es5/esm/Arrow.js",
   "browser": "./targets/es5/umd/Arrow.js",
   "jsnext:main": "./targets/es2015/esm/Arrow.js",
   "esnext:main": "./targets/esnext/esm/Arrow.js",
-  "typings": "./typings/Arrow.d.ts",
   "description": "Apache Arrow columnar in-memory format",
   "scripts": {
     "lerna": "lerna",
-    "commit": "git-cz",
     "test": "gulp test",
     "build": "gulp build",
     "clean": "gulp clean",
+    "bundle": "gulp bundle",
+    "package": "gulp package",
     "perf": "node ./perf/index.js",
     "test:debug": "gulp test:debug",
     "test:coverage": "gulp test -t esnext -m esm --coverage",
-    "validate": "npm-run-all lint build test",
+    "validate": "npm-run-all clean lint build test bundle",
     "lerna:publish": "lerna exec --bail=false npm publish",
     "prepublishOnly": "sh ./prepublish.sh",
-    "commitmsg": "validate-commit-msg",
     "doc": "shx rm -rf ./doc && esdoc",
     "lint": "npm-run-all -p lint:*",
     "lint:src": "tslint --fix --type-check -p tsconfig.json -c tslint.json \"src/**/*.ts\"",
@@ -43,64 +44,52 @@
   "files": [
     "src",
     "dist",
-    "typings",
+    "types",
     "targets",
     "LICENSE",
     "README.md"
   ],
   "peerDependencies": {
-    "tslib": "^1.7.1"
+    "tslib": "~1.7.1",
+    "command-line-usage": "4.0.1"
   },
   "dependencies": {
-    "command-line-args": "~4.0.7",
-    "command-line-usage": "~4.0.1",
-    "flatbuffers": "~1.7.0",
-    "text-encoding": "~0.6.4"
+    "flatbuffers": "1.7.0",
+    "text-encoding": "0.6.4"
   },
   "devDependencies": {
-    "@types/flatbuffers": "~1.6.4",
-    "@types/jest": "~20.0.8",
-    "@types/node": "~8.0.24",
+    "@types/flatbuffers": "1.6.4",
+    "@types/jest": "20.0.8",
+    "@types/node": "^8.0.24",
     "@types/text-encoding": "0.0.32",
-    "benchmark": "~2.1.4",
-    "commitizen": "~2.9.6",
-    "conventional-changelog-cli": "~1.3.2",
-    "conventional-commits-detector": "~0.1.1",
-    "conventional-github-releaser": "~1.1.12",
-    "conventional-recommended-bump": "~1.0.1",
-    "coveralls": "~2.13.1",
-    "cz-conventional-changelog": "~2.0.0",
-    "del": "~3.0.0",
-    "esdoc": "~1.0.1",
-    "esdoc-standard-plugin": "~1.0.0",
-    "google-closure-compiler": "~20170910.0.0",
+    "benchmark": "2.1.4",
+    "coveralls": "2.13.1",
+    "command-line-args": "4.0.7",
+    "del": "3.0.0",
+    "esdoc": "1.0.3",
+    "esdoc-standard-plugin": "1.0.0",
+    "google-closure-compiler": "20170910.0.0",
     "gulp": "github:gulpjs/gulp#4.0",
-    "gulp-json-transform": "~0.4.2",
-    "gulp-sourcemaps": "~2.6.1",
-    "gulp-typescript": "~3.2.2",
-    "jest": "~21.1.0",
-    "jest-environment-node-debug": "~2.0.0",
-    "json": "~9.0.6",
-    "lerna": "~2.1.2",
-    "lint-staged": "~4.2.1",
-    "merge2": "~1.2.0",
-    "mkdirp": "~0.5.1",
-    "npm-run-all": "~4.1.1",
-    "pump": "~1.0.2",
-    "rimraf": "~2.6.1",
-    "shx": "~0.2.2",
-    "text-encoding-utf-8": "~1.0.1",
-    "trash": "~4.0.1",
-    "ts-jest": "~21.0.1",
-    "tslib": "~1.7.1",
-    "tslint": "~5.7.0",
-    "typescript": "~2.5.2",
-    "validate-commit-msg": "~2.14.0"
-  },
-  "config": {
-    "commitizen": {
-      "path": "cz-conventional-changelog"
-    }
+    "gulp-json-transform": "0.4.2",
+    "gulp-sourcemaps": "2.6.1",
+    "gulp-typescript": "3.2.2",
+    "jest": "21.1.0",
+    "jest-environment-node-debug": "2.0.0",
+    "json": "9.0.6",
+    "lerna": "2.2.0",
+    "lint-staged": "4.2.1",
+    "merge2": "1.2.0",
+    "mkdirp": "0.5.1",
+    "npm-run-all": "4.1.1",
+    "pump": "1.0.2",
+    "rimraf": "2.6.2",
+    "shx": "0.2.2",
+    "text-encoding-utf-8": "1.0.1",
+    "trash": "4.1.0",
+    "ts-jest": "21.0.1",
+    "tslib": "1.7.1",
+    "tslint": "5.7.0",
+    "typescript": "2.5.2"
   },
   "lint-staged": {
     "*.@(ts)": [

http://git-wip-us.apache.org/repos/asf/arrow/blob/8996a4ff/js/prepublish.sh
----------------------------------------------------------------------
diff --git a/js/prepublish.sh b/js/prepublish.sh
index 4ad8db1..b40504a 100644
--- a/js/prepublish.sh
+++ b/js/prepublish.sh
@@ -17,10 +17,10 @@
 # specific language governing permissions and limitations
 # under the License.
 
+npm run clean
 npm run lint
 npm run build
 npm run test
-preset=`conventional-commits-detector` && echo $preset
-bump=`conventional-recommended-bump -p $preset` && echo $bump
-npm --no-git-tag-version version $bump &>/dev/null
+npm --no-git-tag-version version patch &>/dev/null
+npm run bundle
 npm run lerna:publish
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/arrow/blob/8996a4ff/js/src/reader/dictionary.ts
----------------------------------------------------------------------
diff --git a/js/src/reader/dictionary.ts b/js/src/reader/dictionary.ts
index aef2bc9..abf7ac3 100644
--- a/js/src/reader/dictionary.ts
+++ b/js/src/reader/dictionary.ts
@@ -19,21 +19,18 @@ import { readVector } from './vector';
 import { MessageBatch } from './message';
 import * as Schema_ from '../format/Schema_generated';
 import { IteratorState, Dictionaries } from './arrow';
-
 import Field = Schema_.org.apache.arrow.flatbuf.Field;
-import DictionaryEncoding = Schema_.org.apache.arrow.flatbuf.DictionaryEncoding;
 
-export function* readDictionaries(field: Field,
+export function* readDictionaries(field: Field | null,
                                   batch: MessageBatch,
                                   iterator: IteratorState,
                                   dictionaries: Dictionaries) {
-    let id: string, encoding: DictionaryEncoding;
-    if ((encoding = field.dictionary()) &&
-        batch.id === (id = encoding.id().toFloat64().toString())) {
+    let id: string, encoding = field && field.dictionary();
+    if (encoding && batch.id === (id = encoding.id().toFloat64().toString())) {
         yield [id, readVector(field, batch, iterator, null)];
         return;
     }
-    for (let i = -1, n = field.childrenLength(); ++i < n;) {
+    for (let i = -1, n = field && field.childrenLength() || 0; ++i < n;) {
         // Since a dictionary batch can only contain a single vector, return early after we find it
         for (let result of readDictionaries(field.children(i), batch, iterator, dictionaries)) {
             yield result;

http://git-wip-us.apache.org/repos/asf/arrow/blob/8996a4ff/js/src/reader/vector.ts
----------------------------------------------------------------------
diff --git a/js/src/reader/vector.ts b/js/src/reader/vector.ts
index a3cd798..3b6663b 100644
--- a/js/src/reader/vector.ts
+++ b/js/src/reader/vector.ts
@@ -54,12 +54,13 @@ function readTypedVector(field: Field, batch: MessageBatch, iterator: IteratorSt
 }
 
 function readDictionaryVector(field: Field, batch: MessageBatch, iterator: IteratorState, dictionaries: Dictionaries) {
-    let encoding: DictionaryEncoding;
+    let encoding: DictionaryEncoding | null;
     if (dictionaries && (encoding = field.dictionary())) {
         let id = encoding.id().toFloat64().toString();
         let fieldType =  encoding.indexType() ||
             /* a dictionary index defaults to signed 32 bit int if unspecified */
             { bitWidth: () => 32, isSigned: () => true };
+        // workaround for https://issues.apache.org/jira/browse/ARROW-1363
         let indexField = createSyntheticDictionaryIndexField(field, fieldType);
         let index = readIntVector(indexField, batch, iterator, null, fieldType);
         return DictionaryVector.create(field, index.length, index, dictionaries[id]);
@@ -105,12 +106,16 @@ function createIntVector(field, length, data, validity, offsets, fieldType, batc
     let type = fieldType || field.type(new Int()), bitWidth = type.bitWidth();
     let Vector = valueForBitWidth(bitWidth, intVectors)[+type.isSigned()];
     return Vector.create(field, length, validity, data || offsets);
-    // ---------------------- so this is kinda strange 👆:
-    // The dictionary encoded vectors I generated from sample mapd-core queries have the indicies' data buffers
-    // tagged as VectorType.OFFSET (0) in the field metadata. The current TS impl ignores buffers' layout type,
-    // and assumes the second buffer is the data for a NullableIntVector. Since we've been stricter about enforcing
-    // the Arrow spec while parsing, the IntVector's data buffer reads empty in this case. If so, fallback to using
-    // the offsets buffer as the data, since IntVectors don't have offsets.
+    // ----------------------------------------------- 👆:
+    // Workaround for https://issues.apache.org/jira/browse/ARROW-1363
+    // This bug causes dictionary encoded vector indicies' IntVector data
+    // buffers to be tagged as VectorType.OFFSET (0) in the field metadata
+    // instead of VectorType.DATA. The `readVectorLayout` routine strictly
+    // obeys the types in the field metadata, so if we're parsing an Arrow
+    // file written by a version of the library published before ARROW-1363
+    // was fixed, the IntVector's data buffer will be null, and the offset
+    // buffer will be the actual data. If data is null, it's safe to assume
+    // the offset buffer is the data, because IntVectors don't have offsets.
 }
 
 const readFloatVector = readVectorLayout<number, FloatArray>(

http://git-wip-us.apache.org/repos/asf/arrow/blob/8996a4ff/js/src/table.ts
----------------------------------------------------------------------
diff --git a/js/src/table.ts b/js/src/table.ts
index 999bb24..5e78105 100644
--- a/js/src/table.ts
+++ b/js/src/table.ts
@@ -19,6 +19,8 @@ import { readBuffers } from './reader/arrow';
 import { StructVector } from './vector/struct';
 import { Vector, sliceToRangeArgs } from './vector/vector';
 
+export type RowObject = { [k: string]: any };
+
 export class Table implements Iterable<Map<string, any>> {
     public length: number;
     protected _columns: Vector<any>[];
@@ -66,22 +68,30 @@ export class Table implements Iterable<Map<string, any>> {
             yield column;
         }
     }
+    getRow(rowIndex: number): RowObject;
+    getRow(rowIndex: number, compact: boolean): Array<any>;
     getRow(rowIndex: number, compact?: boolean) {
         return (compact && rowAsArray || rowAsObject)(rowIndex, this._columns);
     }
-    getCell(columnName: string, rowIndex: number) {
-        return this.getColumn(columnName).get(rowIndex);
+    getCell<T extends any>(columnName: string, rowIndex: number) {
+        return this.getColumn<Vector<T>>(columnName).get(rowIndex);
     }
-    getCellAt(columnIndex: number, rowIndex: number) {
-        return this.getColumnAt(columnIndex).get(rowIndex);
+    getCellAt<T extends any>(columnIndex: number, rowIndex: number) {
+        return this.getColumnAt<Vector<T>>(columnIndex).get(rowIndex);
     }
-    getColumn<T = any>(columnName: string) {
-        return this._columnsMap[columnName] as Vector<T>;
+    getColumn<T extends Vector<any>>(columnName: string) {
+        return this._columnsMap[columnName] as T;
     }
-    getColumnAt<T = any>(columnIndex: number) {
-        return this._columns[columnIndex] as Vector<T>;
+    getColumnAt<T extends Vector<any>>(columnIndex: number) {
+        return this._columns[columnIndex] as T;
     }
-    toString({ index = false } = {}) {
+    toString(): string;
+    toString(index: boolean): string;
+    toString(options: { index: boolean }): string;
+    toString(options?: any) {
+        const index = typeof options === 'object' ? options && !!options.index
+                    : typeof options === 'boolean' ? !!options
+                    : false;
         const { length } = this;
         if (length <= 0) { return ''; }
         const maxColumnWidths = [];