You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by bh...@apache.org on 2019/02/26 15:34:16 UTC

[arrow] branch master updated: ARROW-4674: [JS] Update arrow2csv to new Row API

This is an automated email from the ASF dual-hosted git repository.

bhulette pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 1d0b369  ARROW-4674: [JS] Update arrow2csv to new Row API
1d0b369 is described below

commit 1d0b3697efee154f72b96df20155eb7e68ce6569
Author: ptaylor <pa...@me.com>
AuthorDate: Tue Feb 26 07:31:59 2019 -0800

    ARROW-4674: [JS] Update arrow2csv to new Row API
    
    This PR updates `arrow2csv` cell measurement to iterate the Row values, now that `length` is a Symbol. Closes https://issues.apache.org/jira/browse/ARROW-4674.
    
    Before:
    ```
           "row_id" | "0: Float64" | "1: Utf8"
                  0 | 3.141592653589793 |     "foo"
                  1 | 1.5707963267948966 |     "bar"
                  2 | 1.0471975511965976 |     "baz"
                  3 |          0.7 |    "bork"
    ```
    
    After:
    ```
           "row_id" |       "0: Float64" | "1: Utf8"
                  0 |  3.141592653589793 |     "foo"
                  1 | 1.5707963267948966 |     "bar"
                  2 | 1.0471975511965976 |     "baz"
                  3 |                0.7 |    "bork"
    ```
    
    Author: ptaylor <pa...@me.com>
    
    Closes #3747 from trxcllnt/js/arrow2csv-cell-measurement and squashes the following commits:
    
    31e572c1 <ptaylor> cleanup
    71f1a59c <ptaylor> make arrow2csv iterate the Row values
    36826895 <ptaylor> remove length from RowLike type
    62dff4a6 <ptaylor> ensure numbers/bigint don't use JSON.stringify()
---
 js/src/bin/arrow2csv.ts | 10 ++++++----
 js/src/type.ts          |  7 +++----
 js/src/util/pretty.ts   |  6 +++++-
 js/src/vector/map.ts    |  5 +++--
 js/src/vector/row.ts    |  1 -
 js/src/vector/struct.ts |  5 +++--
 6 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/js/src/bin/arrow2csv.ts b/js/src/bin/arrow2csv.ts
index 397abb5..885f350 100644
--- a/js/src/bin/arrow2csv.ts
+++ b/js/src/bin/arrow2csv.ts
@@ -134,11 +134,12 @@ function formatRow(row: string[] = [], maxColWidths: number[] = [], separator: s
 }
 
 function measureColumnWidths(rowId: number, batch: RecordBatch, maxColWidths: number[] = []) {
+    let val: any, j = 0;
     for (const row of batch) {
         if (!row) { continue; }
-        maxColWidths[0] = Math.max(maxColWidths[0] || 0, (`${rowId++}`).length);
-        for (let val: any, j = -1, k = row.length; ++j < k;) {
-            if (ArrayBuffer.isView(val = row[j]) && (typeof val[Symbol.toPrimitive] !== 'function')) {
+        maxColWidths[j = 0] = Math.max(maxColWidths[0] || 0, (`${rowId++}`).length);
+        for (val of row) {
+            if (val && typedArrayElementWidths.has(val.constructor) && (typeof val[Symbol.toPrimitive] !== 'function')) {
                 // If we're printing a column of TypedArrays, ensure the column is wide enough to accommodate
                 // the widest possible element for a given byte size, since JS omits leading zeroes. For example:
                 // 1 |  [1137743649,2170567488,244696391,2122556476]
@@ -149,7 +150,7 @@ function measureColumnWidths(rowId: number, batch: RecordBatch, maxColWidths: nu
                 // 6 |                                          null
                 // 7 |     [2755142991,4192423256,2994359,467878370]
                 const elementWidth = typedArrayElementWidths.get(val.constructor)!;
-
+    
                 maxColWidths[j + 1] = Math.max(maxColWidths[j + 1] || 0,
                     2 + // brackets on each end
                     (val.length - 1) + // commas between elements
@@ -158,6 +159,7 @@ function measureColumnWidths(rowId: number, batch: RecordBatch, maxColWidths: nu
             } else {
                 maxColWidths[j + 1] = Math.max(maxColWidths[j + 1] || 0, valueToString(val).length);
             }
+            ++j;
         }
     }
     return maxColWidths;
diff --git a/js/src/type.ts b/js/src/type.ts
index 9ad15bc..34bc8e1 100644
--- a/js/src/type.ts
+++ b/js/src/type.ts
@@ -39,10 +39,9 @@ export type IntBitWidth = 8 | 16 | 32 | 64;
 export type IsSigned = { 'true': true; 'false': false };
 /** @ignore */
 export type RowLike<T extends { [key: string]: DataType }> =
-      { readonly length: number }
-    & ( Iterable<T[keyof T]['TValue']> )
-    & { [P in keyof T]: T[P]['TValue'] }
-    & { get<K extends keyof T>(key: K): T[K]['TValue']; }
+      ( Iterable<T[keyof T]['TValue'] | null> )
+    & { [P in keyof T]: T[P]['TValue'] | null }
+    & { get<K extends keyof T>(key: K): T[K]['TValue'] | null; }
     ;
 
 export interface DataType<TType extends Type = Type, TChildren extends { [key: string]: DataType } = any> {
diff --git a/js/src/util/pretty.ts b/js/src/util/pretty.ts
index f337e09..a189fc4 100644
--- a/js/src/util/pretty.ts
+++ b/js/src/util/pretty.ts
@@ -21,7 +21,11 @@
 export function valueToString(x: any) {
     if (x === null) { return 'null'; }
     if (x === undf) { return 'undefined'; }
-    if (typeof x === 'string') { return `"${x}"`; }
+    switch (typeof x) {
+        case 'number': return `${x}`;
+        case 'bigint': return `${x}`;
+        case 'string': return `"${x}"`;
+    }
     // If [Symbol.toPrimitive] is implemented (like in BN)
     // use it instead of JSON.stringify(). This ensures we
     // print BigInts, Decimals, and Binary in their native
diff --git a/js/src/vector/map.ts b/js/src/vector/map.ts
index 54956d7..0576e3b 100644
--- a/js/src/vector/map.ts
+++ b/js/src/vector/map.ts
@@ -15,14 +15,15 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import { RowProxyGenerator } from './row';
+import { Field } from '../schema';
 import { Vector } from '../vector';
 import { BaseVector } from './base';
+import { RowProxyGenerator } from './row';
 import { DataType, Map_, Struct } from '../type';
 
 export class MapVector<T extends { [key: string]: DataType } = any> extends BaseVector<Map_<T>> {
     public asStruct() {
-        return Vector.new(this.data.clone(new Struct<T>(this.type.children)));
+        return Vector.new(this.data.clone(new Struct<T>(this.type.children as Field<T[keyof T]>[])));
     }
     // @ts-ignore
     private _rowProxy: RowProxyGenerator<T>;
diff --git a/js/src/vector/row.ts b/js/src/vector/row.ts
index 54dcd7f..2608745 100644
--- a/js/src/vector/row.ts
+++ b/js/src/vector/row.ts
@@ -108,6 +108,5 @@ export class RowProxyGenerator<T extends { [key: string]: DataType }> {
         const bound = Object.create(this.rowPrototype);
         bound[kRowIndex] = rowIndex;
         return bound;
-        //return new this.RowProxy(rowIndex);
     }
 }
diff --git a/js/src/vector/struct.ts b/js/src/vector/struct.ts
index e1596d6..d503a0a 100644
--- a/js/src/vector/struct.ts
+++ b/js/src/vector/struct.ts
@@ -15,14 +15,15 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import { RowProxyGenerator } from './row';
+import { Field } from '../schema';
 import { Vector } from '../vector';
 import { BaseVector } from './base';
+import { RowProxyGenerator } from './row';
 import { DataType, Map_, Struct } from '../type';
 
 export class StructVector<T extends { [key: string]: DataType } = any> extends BaseVector<Struct<T>> {
     public asMap(keysSorted: boolean = false) {
-        return Vector.new(this.data.clone(new Map_<T>(this.type.children, keysSorted)));
+        return Vector.new(this.data.clone(new Map_<T>(this.type.children as Field<T[keyof T]>[], keysSorted)));
     }
     // @ts-ignore
     private _rowProxy: RowProxyGenerator<T>;