You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by pt...@apache.org on 2018/07/09 02:00:37 UTC
[arrow] branch master updated: ARROW-2789: [JS] Add iterator to
DataFrame
This is an automated email from the ASF dual-hosted git repository.
ptaylor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new ddcb8f5 ARROW-2789: [JS] Add iterator to DataFrame
ddcb8f5 is described below
commit ddcb8f58301ae7376bac91f17c2c1ee37fd8ea95
Author: Brian Hulette <hu...@gmail.com>
AuthorDate: Sun Jul 8 18:59:49 2018 -0700
ARROW-2789: [JS] Add iterator to DataFrame
Add an iterator to `FilteredDataFrame` and the `DataFrame` interface
Author: Brian Hulette <hu...@gmail.com>
Closes #2215 from TheNeuralBit/dataframe-improvements and squashes the following commits:
dba0e618 <Brian Hulette> rename length getter back to count()
a43269f5 <Brian Hulette> fix tests
664a053c <Brian Hulette> Add iterator and readonly length member
---
js/src/table.ts | 27 +++++++++++++++++++++++++--
1 file changed, 25 insertions(+), 2 deletions(-)
diff --git a/js/src/table.ts b/js/src/table.ts
index de06dd7..8144c98 100644
--- a/js/src/table.ts
+++ b/js/src/table.ts
@@ -29,10 +29,11 @@ export type NextFunc = (idx: number, batch: RecordBatch) => void;
export type BindFunc = (batch: RecordBatch) => void;
export interface DataFrame {
+ count(): number;
filter(predicate: Predicate): DataFrame;
scan(next: NextFunc, bind?: BindFunc): void;
- count(): number;
countBy(col: (Col|string)): CountByResult;
+ [Symbol.iterator](): IterableIterator<Struct['TValue']>;
}
export class Table implements DataFrame {
@@ -143,7 +144,6 @@ export class Table implements DataFrame {
}
}
}
- public count(): number { return this.length; }
public countBy(name: Col | string): CountByResult {
const batches = this.batches, numBatches = batches.length;
const count_by = typeof name === 'string' ? new Col(name) : name;
@@ -171,6 +171,9 @@ export class Table implements DataFrame {
}
return new CountByResult(vector.dictionary, IntVector.from(counts));
}
+ public count(): number {
+ return this.length;
+ }
public select(...columnNames: string[]) {
return new Table(this.batches.map((batch) => batch.select(...columnNames)));
}
@@ -239,6 +242,26 @@ class FilteredDataFrame implements DataFrame {
}
return sum;
}
+ public *[Symbol.iterator](): IterableIterator<Struct['TValue']> {
+ // inlined version of this:
+ // this.parent.scan((idx, columns) => {
+ // if (this.predicate(idx, columns)) next(idx, columns);
+ // });
+ const batches = this.batches;
+ const numBatches = batches.length;
+ for (let batchIndex = -1; ++batchIndex < numBatches;) {
+ // load batches
+ const batch = batches[batchIndex];
+ // TODO: bind batches lazily
+ // If predicate doesn't match anything in the batch we don't need
+ // to bind the callback
+ const predicate = this.predicate.bind(batch);
+ // yield all indices
+ for (let index = -1, numRows = batch.length; ++index < numRows;) {
+ if (predicate(index, batch)) { yield batch.get(index) as any; }
+ }
+ }
+ }
public filter(predicate: Predicate): DataFrame {
return new FilteredDataFrame(
this.batches,