You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by bh...@apache.org on 2018/09/28 01:45:00 UTC

[arrow] branch master updated: ARROW-3073, 3074: [JS] Add DateVector.from and fix DateVector.indexOf

This is an automated email from the ASF dual-hosted git repository.

bhulette pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 3a503ef  ARROW-3073,3074: [JS] Add DateVector.from and fix DateVector.indexOf
3a503ef is described below

commit 3a503ef64fa8191b5b9642e9584f9ee06ae96ca9
Author: Brian Hulette <hu...@gmail.com>
AuthorDate: Thu Sep 27 18:44:37 2018 -0700

    ARROW-3073,3074: [JS] Add DateVector.from and fix DateVector.indexOf
    
    - Add a `DateVector.from` static method for creating a `DateVector` from a JS `Date[]`. Relies on `Arrow.util.Int64.fromNumber`. ([ARROW-3073](https://issues.apache.org/jira/browse/ARROW-3073))
    - Add a specialized implementation of `indexOf` for `DateVector` since the base implementation (in `FixedSizeView`) throws an error. ([ARROW-3074](https://issues.apache.org/jira/browse/ARROW-3074))
    
    Author: Brian Hulette <hu...@gmail.com>
    
    Closes #2445 from TheNeuralBit/datevector-from and squashes the following commits:
    
    177a0c89 <Brian Hulette> Convert search value once in DateVector, add support for DateUnit.DAY in DateVector.from
    94772494 <Brian Hulette> Add more unit tests
    7f1e4e68 <Brian Hulette> fix integration
    d0c06f0b <Brian Hulette> Fix DateVector.indexOf
    67c977b5 <Brian Hulette> Add DateVector.from
---
 js/src/Arrow.externs.js      | 16 ++++++++
 js/src/Arrow.ts              | 11 +++++
 js/src/ipc/reader/json.ts    | 30 ++------------
 js/src/util/int.ts           | 96 +++++++++++++++++++++++++++++++++++++++++++-
 js/src/vector.ts             | 17 ++++++++
 js/test/unit/int-tests.ts    | 17 ++++++++
 js/test/unit/vector-tests.ts | 32 ++++++++++++++-
 7 files changed, 191 insertions(+), 28 deletions(-)

diff --git a/js/src/Arrow.externs.js b/js/src/Arrow.externs.js
index 8f284d2..f01ea5c 100644
--- a/js/src/Arrow.externs.js
+++ b/js/src/Arrow.externs.js
@@ -195,6 +195,12 @@ Uint64.add = function() {};
 /** @type {?} */
 Uint64.multiply = function() {};
 /** @type {?} */
+Uint64.from = function() {};
+/** @type {?} */
+Uint64.fromNumber = function() {};
+/** @type {?} */
+Uint64.fromString = function() {};
+/** @type {?} */
 Uint64.prototype.times;
 /** @type {?} */
 Uint64.prototype.plus
@@ -205,6 +211,10 @@ Int64.add = function() {};
 /** @type {?} */
 Int64.multiply = function() {};
 /** @type {?} */
+Int64.from = function() {};
+/** @type {?} */
+Int64.fromNumber = function() {};
+/** @type {?} */
 Int64.fromString = function() {};
 /** @type {?} */
 Int64.prototype.negate
@@ -221,6 +231,10 @@ Int128.add = function() {};
 /** @type {?} */
 Int128.multiply = function() {};
 /** @type {?} */
+Int128.from = function() {};
+/** @type {?} */
+Int128.fromNumber = function() {};
+/** @type {?} */
 Int128.fromString = function() {};
 /** @type {?} */
 Int128.prototype.negate
@@ -539,6 +553,8 @@ FloatVector.from = function() {};
 
 var DateVector = function() {};
 /** @type {?} */
+DateVector.from = function() {};
+/** @type {?} */
 DateVector.prototype.asEpochMilliseconds;
 var DecimalVector = function() {};
 var TimeVector = function() {};
diff --git a/js/src/Arrow.ts b/js/src/Arrow.ts
index 61556c4..b1f4a3a 100644
--- a/js/src/Arrow.ts
+++ b/js/src/Arrow.ts
@@ -246,14 +246,24 @@ RecordBatch['from'] = RecordBatch.from;
 
 util_int_.Uint64['add'] = util_int_.Uint64.add;
 util_int_.Uint64['multiply'] = util_int_.Uint64.multiply;
+util_int_.Uint64['from'] = util_int_.Uint64.from;
+util_int_.Uint64['fromNumber'] = util_int_.Uint64.fromNumber;
+util_int_.Uint64['fromString'] = util_int_.Uint64.fromString;
+util_int_.Uint64['convertArray'] = util_int_.Uint64.convertArray;
 
 util_int_.Int64['add'] = util_int_.Int64.add;
 util_int_.Int64['multiply'] = util_int_.Int64.multiply;
+util_int_.Int64['from'] = util_int_.Int64.from;
+util_int_.Int64['fromNumber'] = util_int_.Int64.fromNumber;
 util_int_.Int64['fromString'] = util_int_.Int64.fromString;
+util_int_.Int64['convertArray'] = util_int_.Int64.convertArray;
 
 util_int_.Int128['add'] = util_int_.Int128.add;
 util_int_.Int128['multiply'] = util_int_.Int128.multiply;
+util_int_.Int128['from'] = util_int_.Int128.from;
+util_int_.Int128['fromNumber'] = util_int_.Int128.fromNumber;
 util_int_.Int128['fromString'] = util_int_.Int128.fromString;
+util_int_.Int128['convertArray'] = util_int_.Int128.convertArray;
 
 data_.ChunkedData['computeOffsets'] = data_.ChunkedData.computeOffsets;
 
@@ -301,6 +311,7 @@ type_.DataType['isMap'] = type_.DataType.isMap;
 type_.DataType['isDictionary'] = type_.DataType.isDictionary;
 
 vector_.BoolVector['from'] = vector_.BoolVector.from;
+vector_.DateVector['from'] = vector_.DateVector.from;
 vector_.IntVector['from'] = vector_.IntVector.from;
 vector_.FloatVector['from'] = vector_.FloatVector.from;
 
diff --git a/js/src/ipc/reader/json.ts b/js/src/ipc/reader/json.ts
index e8ab498..0f0c018 100644
--- a/js/src/ipc/reader/json.ts
+++ b/js/src/ipc/reader/json.ts
@@ -100,13 +100,13 @@ export class JSONDataLoader extends TypeDataLoader {
     protected readData<T extends DataType>(type: T, { offset }: BufferMetadata = this.getBufferMetadata()) {
         const { sources } = this;
         if (DataType.isTimestamp(type) === true) {
-            return new Uint8Array(int64DataFromJSON(sources[offset] as string[]));
+            return new Uint8Array(IntUtil.Int64.convertArray(sources[offset] as string[]).buffer);
         } else if ((DataType.isInt(type) || DataType.isTime(type)) && type.bitWidth === 64) {
-            return new Uint8Array(int64DataFromJSON(sources[offset] as string[]));
+            return new Uint8Array(IntUtil.Int64.convertArray(sources[offset] as string[]).buffer);
         } else if (DataType.isDate(type) && type.unit === DateUnit.MILLISECOND) {
-            return new Uint8Array(int64DataFromJSON(sources[offset] as string[]));
+            return new Uint8Array(IntUtil.Int64.convertArray(sources[offset] as string[]).buffer);
         } else if (DataType.isDecimal(type) === true) {
-            return new Uint8Array(decimalDataFromJSON(sources[offset] as string[]));
+            return new Uint8Array(IntUtil.Int128.convertArray(sources[offset] as string[]).buffer);
         } else if (DataType.isBinary(type) === true || DataType.isFixedSizeBinary(type) === true) {
             return new Uint8Array(binaryDataFromJSON(sources[offset] as string[]));
         } else if (DataType.isBool(type) === true) {
@@ -119,28 +119,6 @@ export class JSONDataLoader extends TypeDataLoader {
     }
 }
 
-function int64DataFromJSON(values: string[]) {
-    const data = new Uint32Array(values.length * 2);
-    for (let i = -1, n = values.length; ++i < n;) {
-        // Force all values (even numbers) to be parsed as strings since
-        // pulling out high and low bits seems to lose precision sometimes
-        // For example:
-        //     > -4613034156400212000 >>> 0
-        //     721782784
-        // The correct lower 32-bits are 721782752
-        IntUtil.Int64.fromString(values[i].toString(), new Uint32Array(data.buffer, data.byteOffset + 2 * i * 4, 2));
-    }
-    return data.buffer;
-}
-
-function decimalDataFromJSON(values: string[]) {
-    const data = new Uint32Array(values.length * 4);
-    for (let i = -1, n = values.length; ++i < n;) {
-        IntUtil.Int128.fromString(values[i], new Uint32Array(data.buffer, data.byteOffset + 4 * 4 * i, 4));
-    }
-    return data.buffer;
-}
-
 function binaryDataFromJSON(values: string[]) {
     // "DATA": ["49BC7D5B6C47D2","3F5FB6D9322026"]
     // There are definitely more efficient ways to do this... but it gets the
diff --git a/js/src/util/int.ts b/js/src/util/int.ts
index 9088e7b..5b9497f 100644
--- a/js/src/util/int.ts
+++ b/js/src/util/int.ts
@@ -118,6 +118,50 @@ export class Uint64 extends BaseInt64 {
         return this;
     }
 
+    static from(val: any, out_buffer = new Uint32Array(2)): Uint64 {
+        return Uint64.fromString(
+            typeof(val) === 'string' ? val : val.toString(),
+            out_buffer
+        );
+    }
+
+    static fromNumber(num: number, out_buffer = new Uint32Array(2)): Uint64 {
+        // Always parse numbers as strings - pulling out high and low bits
+        // directly seems to lose precision sometimes
+        // For example:
+        //     > -4613034156400212000 >>> 0
+        //     721782784
+        // The correct lower 32-bits are 721782752
+        return Uint64.fromString(num.toString(), out_buffer);
+    }
+
+    static fromString(str: string, out_buffer = new Uint32Array(2)): Uint64 {
+        const length = str.length;
+
+        let out = new Uint64(out_buffer);
+        for (let posn = 0; posn < length;) {
+            const group = kInt32DecimalDigits < length - posn ?
+                          kInt32DecimalDigits : length - posn;
+            const chunk = new Uint64(new Uint32Array([parseInt(str.substr(posn, group), 10), 0]));
+            const multiple = new Uint64(new Uint32Array([kPowersOfTen[group], 0]));
+
+            out.times(multiple);
+            out.plus(chunk);
+
+            posn += group;
+        }
+
+        return out;
+    }
+
+    static convertArray(values: (string|number)[]): Uint32Array {
+        const data = new Uint32Array(values.length * 2);
+        for (let i = -1, n = values.length; ++i < n;) {
+            Uint64.from(values[i], new Uint32Array(data.buffer, data.byteOffset + 2 * i * 4, 2));
+        }
+        return data;
+    }
+
     static multiply(left: Uint64, right: Uint64): Uint64 {
         let rtrn = new Uint64(new Uint32Array(left.buffer));
         return rtrn.times(right);
@@ -156,6 +200,23 @@ export class Int64 extends BaseInt64 {
             (this_high === other_high && this.buffer[0] < other.buffer[0]);
     }
 
+    static from(val: any, out_buffer = new Uint32Array(2)): Int64 {
+        return Int64.fromString(
+            typeof(val) === 'string' ? val : val.toString(),
+            out_buffer
+        );
+    }
+
+    static fromNumber(num: number, out_buffer = new Uint32Array(2)): Int64 {
+        // Always parse numbers as strings - pulling out high and low bits
+        // directly seems to lose precision sometimes
+        // For example:
+        //     > -4613034156400212000 >>> 0
+        //     721782784
+        // The correct lower 32-bits are 721782752
+        return Int64.fromString(num.toString(), out_buffer);
+    }
+
     static fromString(str: string, out_buffer = new Uint32Array(2)): Int64 {
         // TODO: Assert that out_buffer is 0 and length = 2
         const negate = str.startsWith('-');
@@ -173,10 +234,17 @@ export class Int64 extends BaseInt64 {
 
             posn += group;
         }
-
         return negate ? out.negate() : out;
     }
 
+    static convertArray(values: (string|number)[]): Uint32Array {
+        const data = new Uint32Array(values.length * 2);
+        for (let i = -1, n = values.length; ++i < n;) {
+            Int64.from(values[i], new Uint32Array(data.buffer, data.byteOffset + 2 * i * 4, 2));
+        }
+        return data;
+    }
+
     static multiply(left: Int64, right: Int64): Int64 {
         let rtrn = new Int64(new Uint32Array(left.buffer));
         return rtrn.times(right);
@@ -297,6 +365,23 @@ export class Int128 {
         return rtrn.plus(right);
     }
 
+    static from(val: any, out_buffer = new Uint32Array(4)): Int128 {
+        return Int128.fromString(
+            typeof(val) === 'string' ? val : val.toString(),
+            out_buffer
+        );
+    }
+
+    static fromNumber(num: number, out_buffer = new Uint32Array(4)): Int128 {
+        // Always parse numbers as strings - pulling out high and low bits
+        // directly seems to lose precision sometimes
+        // For example:
+        //     > -4613034156400212000 >>> 0
+        //     721782784
+        // The correct lower 32-bits are 721782752
+        return Int128.fromString(num.toString(), out_buffer);
+    }
+
     static fromString(str: string, out_buffer = new Uint32Array(4)): Int128 {
         // TODO: Assert that out_buffer is 0 and length = 4
         const negate = str.startsWith('-');
@@ -317,4 +402,13 @@ export class Int128 {
 
         return negate ? out.negate() : out;
     }
+
+    static convertArray(values: (string|number)[]): Uint32Array {
+        // TODO: Distinguish between string and number at compile-time
+        const data = new Uint32Array(values.length * 4);
+        for (let i = -1, n = values.length; ++i < n;) {
+            Int128.from(values[i], new Uint32Array(data.buffer, data.byteOffset + 4 * 4 * i, 4));
+        }
+        return data;
+    }
 }
diff --git a/js/src/vector.ts b/js/src/vector.ts
index 8eb591b..b01f420 100644
--- a/js/src/vector.ts
+++ b/js/src/vector.ts
@@ -19,6 +19,7 @@ import { Data, ChunkedData, FlatData, BoolData, FlatListData, NestedData, Dictio
 import { VisitorNode, TypeVisitor, VectorVisitor } from './visitor';
 import { DataType, ListType, FlatType, NestedType, FlatListType, TimeUnit } from './type';
 import { IterableArrayLike, Precision, DateUnit, IntervalUnit, UnionMode } from './type';
+import * as IntUtil from './util/int';
 
 export interface VectorLike { length: number; nullCount: number; }
 
@@ -259,6 +260,19 @@ export class FloatVector<T extends Float = Float<any>> extends FlatVector<T> {
 }
 
 export class DateVector extends FlatVector<Date_> {
+    static from(data: Date[], unit: DateUnit = DateUnit.MILLISECOND): DateVector {
+        const type_ = new Date_(unit);
+        const converted =
+            unit === DateUnit.MILLISECOND ?
+            IntUtil.Int64.convertArray(data.map((d) => d.valueOf())) :
+            unit === DateUnit.DAY ?
+            Int32Array.from(data.map((d) => d.valueOf() / 86400000)) :
+            undefined;
+        if (converted === undefined) {
+            throw new TypeError(`Unrecognized date unit "${DateUnit[unit]}"`);
+        }
+        return new DateVector(new FlatData(type_, data.length, null, converted));
+    }
     static defaultView<T extends Date_>(data: Data<T>) {
         return data.type.unit === DateUnit.DAY ? new DateDayView(data) : new DateMillisecondView(data, 2);
     }
@@ -279,6 +293,9 @@ export class DateVector extends FlatVector<Date_> {
         }
         throw new TypeError(`Unrecognized date unit "${DateUnit[this.type.unit]}"`);
     }
+    public indexOf(search: Date) {
+        return this.asEpochMilliseconds().indexOf(search.valueOf());
+    }
 }
 
 export class DecimalVector extends FlatVector<Decimal> {
diff --git a/js/test/unit/int-tests.ts b/js/test/unit/int-tests.ts
index fbd92e7..4214600 100644
--- a/js/test/unit/int-tests.ts
+++ b/js/test/unit/int-tests.ts
@@ -63,6 +63,15 @@ describe(`Uint64`, () => {
         let b = new Uint64(new Uint32Array([568, 32]));
         expect(a.lessThan(b)).toBeTruthy();
     });
+    test(`fromString parses string`, () => {
+        expect(Uint64.fromString('6789123456789')).toEqual(new Int64(new Uint32Array([0xb74abf15, 0x62c])));
+    });
+    test(`fromString parses big (full unsigned 64-bit) string`, () => {
+        expect(Uint64.fromString('18364758544493064720')).toEqual(new Uint64(new Uint32Array([0x76543210, 0xfedcba98])));
+    });
+    test(`fromNumber converts 53-ish bit number`, () => {
+        expect(Uint64.fromNumber(8086463330923024)).toEqual(new Uint64(new Uint32Array([0x76543210, 0x001cba98])));
+    });
 });
 
 describe(`Int64`, () => {
@@ -150,6 +159,10 @@ describe(`Int64`, () => {
     test(`fromString parses negative string`, () => {
         expect(Int64.fromString('-6789123456789')).toEqual(new Int64(new Uint32Array([0x48b540eb, 0xfffff9d3])));
     });
+    test(`fromNumber converts 53-ish bit number`, () => {
+        expect(Int64.fromNumber(8086463330923024)).toEqual(new Int64(new Uint32Array([0x76543210, 0x001cba98])));
+        expect(Int64.fromNumber(-8086463330923024)).toEqual(new Int64(new Uint32Array([0x89abcdf0, 0xffe34567])));
+    });
 });
 
 describe(`Int128`, () => {
@@ -221,4 +234,8 @@ describe(`Int128`, () => {
                                                 0x0ffdccec,
                                                 0xf6b64f09])));
     });
+    test(`fromNumber converts 53-ish bit number`, () => {
+        expect(Int128.fromNumber(8086463330923024)).toEqual(new Int128(new Uint32Array([0x76543210, 0x001cba98, 0, 0])));
+        expect(Int128.fromNumber(-8086463330923024)).toEqual(new Int128(new Uint32Array([0x89abcdf0, 0xffe34567, 0xffffffff, 0xffffffff])));
+    });
 });
diff --git a/js/test/unit/vector-tests.ts b/js/test/unit/vector-tests.ts
index d25e0e9..7b46890 100644
--- a/js/test/unit/vector-tests.ts
+++ b/js/test/unit/vector-tests.ts
@@ -23,7 +23,7 @@ const utf8Encoder = new TextEncoder('utf-8');
 
 const { packBools } = Arrow.util;
 const { BoolData, FlatData, FlatListData, DictionaryData } = Arrow.data;
-const { Vector, IntVector, FloatVector, BoolVector, Utf8Vector, DictionaryVector } = Arrow.vector;
+const { Vector, IntVector, FloatVector, BoolVector, Utf8Vector, DateVector, DictionaryVector } = Arrow.vector;
 const {
     Dictionary, Utf8, Bool,
     Float16, Float32, Float64,
@@ -31,6 +31,8 @@ const {
     Uint8, Uint16, Uint32, Uint64,
 } = Arrow.type;
 
+const { DateUnit } = Arrow.enum_;
+
 const FixedSizeVectors = {
     Int64Vector: [IntVector, Int64] as [typeof IntVector, typeof Int64],
     Uint64Vector: [IntVector, Uint64] as [typeof IntVector, typeof Uint64],
@@ -317,6 +319,34 @@ describe(`Utf8Vector`, () => {
     });
 });
 
+describe(`DateVector`, () => {
+    const extras = [
+        new Date(2000, 0, 1),
+        new Date(1991, 5, 28, 12, 11, 10)
+    ];
+    describe(`unit = MILLISECOND`, () => {
+        const values = [
+            new Date(1989, 5, 22, 1, 2, 3),
+            new Date(1988, 3, 25, 4, 5, 6),
+            new Date(1987, 2, 24, 7, 8, 9),
+            new Date(2018, 4, 12, 17, 30, 0)
+        ];
+        const vector = DateVector.from(values);
+        basicVectorTests(vector, values, extras);
+    });
+    describe(`unit = DAY`, () => {
+        // Use UTC to ensure that dates are always at midnight
+        const values = [
+            new Date(Date.UTC(1989, 5, 22)),
+            new Date(Date.UTC(1988, 3, 25)),
+            new Date(Date.UTC(1987, 2, 24)),
+            new Date(Date.UTC(2018, 4, 12))
+        ];
+        const vector = DateVector.from(values, DateUnit.DAY);
+        basicVectorTests(vector, values, extras);
+    });
+});
+
 describe(`DictionaryVector`, () => {
     const dictionary = ['foo', 'bar', 'baz'];
     const extras = ['abc', '123']; // values to search for that should NOT be found