You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/05/09 22:31:35 UTC
[2/2] arrow git commit: ARROW-940: [JS] Generate multiple artifacts
ARROW-940: [JS] Generate multiple artifacts
Running `npm run build` now produces three sets of artifacts:
* `lib/`: CommonJS modules with typescript declarations
* `lib-esm/`: ES6 modules with typescript declarations
* `_bundles/`: minified and un-minified bundles with source maps for use in the browser
This PR also adds `.npmigore` and `bower.json` to get ready for packaging releases for both npm and bower
Author: Brian Hulette <br...@ccri.com>
Closes #663 from TheNeuralBit/multiple-artifacts and squashes the following commits:
a056cd9 [Brian Hulette] update README
7779797 [Brian Hulette] add typescript dev dependency
895c95b [Brian Hulette] update npm main file
71aefb9 [Brian Hulette] Add bower config, add repo to npm config
0d47146 [Brian Hulette] updated read_file example
b01bd75 [Brian Hulette] JS lib now creates multiple artifacts: ES5/6 with .d.ts files, and bundles
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/2d6453b2
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/2d6453b2
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/2d6453b2
Branch: refs/heads/master
Commit: 2d6453b25318b81af967f0cfdddacf183a60098c
Parents: 22c738c
Author: Brian Hulette <br...@ccri.com>
Authored: Tue May 9 18:31:28 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Tue May 9 18:31:28 2017 -0400
----------------------------------------------------------------------
js/.gitignore | 7 +-
js/.npmignore | 7 +
js/README.md | 13 +-
js/bin/arrow2csv.js | 13 +-
js/bin/arrow_schema.js | 2 +-
js/bower.json | 17 ++
js/examples/read_file.html | 12 +-
js/flatbuffers.sh | 14 +-
js/lib/Arrow_generated.d.ts | 5 -
js/lib/arrow.ts | 493 --------------------------------
js/lib/bitarray.ts | 42 ---
js/lib/types.ts | 589 ---------------------------------------
js/package.json | 10 +-
js/spec/arrow.js | 2 +-
js/src/Arrow_generated.d.ts | 5 +
js/src/arrow.ts | 493 ++++++++++++++++++++++++++++++++
js/src/bitarray.ts | 42 +++
js/src/types.ts | 589 +++++++++++++++++++++++++++++++++++++++
js/tsconfig.json | 14 +-
js/webpack.config.js | 38 ++-
20 files changed, 1236 insertions(+), 1171 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/2d6453b2/js/.gitignore
----------------------------------------------------------------------
diff --git a/js/.gitignore b/js/.gitignore
index f67c1cc..ea5514f 100644
--- a/js/.gitignore
+++ b/js/.gitignore
@@ -1,6 +1,7 @@
-lib/*_generated.js
-dist
+src/Arrow_generated.js
+lib
+lib-esm
+_bundles
node_modules
-typings
.idea
*.iml
http://git-wip-us.apache.org/repos/asf/arrow/blob/2d6453b2/js/.npmignore
----------------------------------------------------------------------
diff --git a/js/.npmignore b/js/.npmignore
new file mode 100644
index 0000000..333aeec
--- /dev/null
+++ b/js/.npmignore
@@ -0,0 +1,7 @@
+.gitignore
+.npmignore
+src/
+spec/
+tsconfig.json
+webpack.config.js
+flatbuffers.sh
http://git-wip-us.apache.org/repos/asf/arrow/blob/2d6453b2/js/README.md
----------------------------------------------------------------------
diff --git a/js/README.md b/js/README.md
index cdabf54..167bafc 100644
--- a/js/README.md
+++ b/js/README.md
@@ -18,9 +18,8 @@ From this directory, run:
``` bash
$ npm install # pull dependencies
-$ tsc # build typescript
-$ webpack # bundle for the browser
-$ npm test # run unit tests
+$ npm run build # build typescript (run tsc and webpack)
+$ npm run test # run the unit tests (node.js only)
```
### Usage
@@ -30,17 +29,17 @@ The library is designed to be used with node.js or in the browser, this reposito
Import the arrow module:
``` js
-var arrow = require("arrow.js");
+var arrow = require("arrow");
```
See [bin/arrow_schema.js](bin/arrow_schema.js) and [bin/arrow2csv.js](bin/arrow2csv.js) for usage examples.
#### Browser
-Include `dist/arrow-bundle.js` in a `<script />` tag:
+Include `_bundles/arrow.js` in a `<script />` tag:
``` html
-<script src="arrow-bundle.js"/>
+<script src="_bundles/arrow.js"/>
```
-See [examples/read_file.html](examples/read_file.html) for a usage example - or try it out now at [theneuralbit.github.io/arrow](http://theneuralbit.github.io/arrow)
+See [examples/read_file.html](examples/read_file.html) for a usage example.
### API
##### `arrow.getReader(buffer)`
http://git-wip-us.apache.org/repos/asf/arrow/blob/2d6453b2/js/bin/arrow2csv.js
----------------------------------------------------------------------
diff --git a/js/bin/arrow2csv.js b/js/bin/arrow2csv.js
index 8122e95..c1801f7 100755
--- a/js/bin/arrow2csv.js
+++ b/js/bin/arrow2csv.js
@@ -19,7 +19,7 @@
var fs = require('fs')
var process = require('process');
-var arrow = require('../dist/arrow.js');
+var arrow = require('../lib/arrow.js');
var program = require('commander');
function list (val) {
@@ -39,10 +39,15 @@ if (!program.schema) {
var buf = fs.readFileSync(process.argv[process.argv.length - 1]);
var reader = arrow.getReader(buf);
-reader.loadNextBatch();
+var nrecords
-for (var i = 0; i < reader.getVector(program.schema[0]).length; i += 1|0) {
+nrecords = reader.loadNextBatch();
+while (nrecords > 0) {
+ for (var i = 0; i < nrecords; i += 1|0) {
console.log(program.schema.map(function (field) {
- return '' + reader.getVector(field).get(i);
+ return '' + reader.getVector(field).get(i);
}).join(','));
+ }
+ nrecords = reader.loadNextBatch();
+ if (nrecords > 0) console.log('---');
}
http://git-wip-us.apache.org/repos/asf/arrow/blob/2d6453b2/js/bin/arrow_schema.js
----------------------------------------------------------------------
diff --git a/js/bin/arrow_schema.js b/js/bin/arrow_schema.js
index 44dabb4..4917628 100755
--- a/js/bin/arrow_schema.js
+++ b/js/bin/arrow_schema.js
@@ -19,7 +19,7 @@
var fs = require('fs');
var process = require('process');
-var arrow = require('../dist/arrow.js');
+var arrow = require('../lib/arrow.js');
var buf = fs.readFileSync(process.argv[process.argv.length - 1]);
var reader = arrow.getReader(buf);
http://git-wip-us.apache.org/repos/asf/arrow/blob/2d6453b2/js/bower.json
----------------------------------------------------------------------
diff --git a/js/bower.json b/js/bower.json
new file mode 100644
index 0000000..c2099f8
--- /dev/null
+++ b/js/bower.json
@@ -0,0 +1,17 @@
+{
+ "name": "arrow",
+ "description": "",
+ "main": "_bundles/arrow.js",
+ "authors": [],
+ "license": "Apache-2.0",
+ "homepage": "http://arrow.apache.org",
+ "ignore": [
+ ".gitignore",
+ ".npmignore",
+ "src/",
+ "spec/",
+ "tsconfig.json",
+ "webpack.config.js",
+ "flatbuffers.sh"
+ ]
+}
http://git-wip-us.apache.org/repos/asf/arrow/blob/2d6453b2/js/examples/read_file.html
----------------------------------------------------------------------
diff --git a/js/examples/read_file.html b/js/examples/read_file.html
index 933b142..5a650a0 100644
--- a/js/examples/read_file.html
+++ b/js/examples/read_file.html
@@ -39,12 +39,10 @@ function addCell (tr, type, name) {
tr.appendChild(td);
}
reader.onload = function (evt) {
- var buf = new Uint8Array(evt.target.result);
- var schema = arrow.loadSchemaFromStream(buf);
- var vectors = arrow.loadVectorsFromStream(buf);
- var length = vectors[schema[0].name].length;
+ var reader = new arrow.getReader(new Uint8Array(evt.target.result));
+ var schema = reader.getSchema();
+ var length = reader.loadNextBatch();
console.log(JSON.stringify(schema, null, '\t'));
-console.log(JSON.stringify(vectors, null, '\t'));
var thead = document.getElementById("thead");
var tbody = document.getElementById("tbody");
@@ -58,7 +56,7 @@ console.log(JSON.stringify(vectors, null, '\t'));
for (var i = 0; i < length; i += 1|0) {
var tr = document.createElement("tr");
- schema.forEach(function (d) { addCell(tr, "td", vectors[d.name].get(i)); });
+ schema.forEach(function (d) { addCell(tr, "td", reader.getVector(d.name).get(i)); });
tbody.appendChild(tr);
}
}
@@ -76,6 +74,6 @@ function handleFiles(files) {
<tbody id="tbody">
</tbody>
</table>
- <script type="text/javascript" src="../dist/arrow-bundle.js"></script>
+ <script type="text/javascript" src="../_bundles/arrow.js"></script>
</body>
</html>
http://git-wip-us.apache.org/repos/asf/arrow/blob/2d6453b2/js/flatbuffers.sh
----------------------------------------------------------------------
diff --git a/js/flatbuffers.sh b/js/flatbuffers.sh
index 99d2815..55967f8 100755
--- a/js/flatbuffers.sh
+++ b/js/flatbuffers.sh
@@ -13,7 +13,13 @@
# limitations under the License. See accompanying LICENSE file.
echo "Compiling flatbuffer schemas..."
-#flatc -o lib --js ../format/Message.fbs ../format/File.fbs
-flatc -o lib --js ../format/*.fbs
-rm -f lib/Arrow_generated.js
-cat lib/*_generated.js > lib/Arrow_generated.js
+mkdir -p lib lib-esm
+DIR=`mktemp -d`
+flatc -o $DIR --js ../format/*.fbs
+cat $DIR/*_generated.js > src/Arrow_generated.js
+
+# Duplicate in the tsc-generated outputs - we can't make tsc pull in .js files
+# and still prooduce declaration files
+cat $DIR/*_generated.js > lib/Arrow_generated.js
+cat $DIR/*_generated.js > lib-esm/Arrow_generated.js
+rm -rf $DIR
http://git-wip-us.apache.org/repos/asf/arrow/blob/2d6453b2/js/lib/Arrow_generated.d.ts
----------------------------------------------------------------------
diff --git a/js/lib/Arrow_generated.d.ts b/js/lib/Arrow_generated.d.ts
deleted file mode 100644
index 1f5b454..0000000
--- a/js/lib/Arrow_generated.d.ts
+++ /dev/null
@@ -1,5 +0,0 @@
-export var org: {
- apache: {
- arrow: any
- }
-}
http://git-wip-us.apache.org/repos/asf/arrow/blob/2d6453b2/js/lib/arrow.ts
----------------------------------------------------------------------
diff --git a/js/lib/arrow.ts b/js/lib/arrow.ts
deleted file mode 100644
index 74def4d..0000000
--- a/js/lib/arrow.ts
+++ /dev/null
@@ -1,493 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-import { flatbuffers } from 'flatbuffers';
-import { org } from './Arrow_generated';
-import { vectorFromField, Vector } from './types';
-
-import ByteBuffer = flatbuffers.ByteBuffer;
-var Footer = org.apache.arrow.flatbuf.Footer;
-var Message = org.apache.arrow.flatbuf.Message;
-var MessageHeader = org.apache.arrow.flatbuf.MessageHeader;
-var RecordBatch = org.apache.arrow.flatbuf.RecordBatch;
-var DictionaryBatch = org.apache.arrow.flatbuf.DictionaryBatch;
-var Schema = org.apache.arrow.flatbuf.Schema;
-var Type = org.apache.arrow.flatbuf.Type;
-var VectorType = org.apache.arrow.flatbuf.VectorType;
-
-export class ArrowReader {
-
- private bb;
- private schema: any = [];
- private vectors: Vector[];
- private vectorMap: any = {};
- private dictionaries: any = {};
- private batches: any = [];
- private batchIndex: number = 0;
-
- constructor(bb, schema, vectors: Vector[], batches, dictionaries) {
- this.bb = bb;
- this.schema = schema;
- this.vectors = vectors;
- for (var i = 0; i < vectors.length; i += 1|0) {
- this.vectorMap[vectors[i].name] = vectors[i]
- }
- this.batches = batches;
- this.dictionaries = dictionaries;
- }
-
- loadNextBatch() {
- if (this.batchIndex < this.batches.length) {
- var batch = this.batches[this.batchIndex];
- this.batchIndex += 1;
- loadVectors(this.bb, this.vectors, batch);
- return batch.length;
- } else {
- return 0;
- }
- }
-
- getSchema() {
- return this.schema;
- }
-
- getVectors() {
- return this.vectors;
- }
-
- getVector(name) {
- return this.vectorMap[name];
- }
-
- getBatchCount() {
- return this.batches.length;
- }
-
- // the index of the next batch to be loaded
- getBatchIndex() {
- return this.batchIndex;
- }
-
- // set the index of the next batch to be loaded
- setBatchIndex(i: number) {
- this.batchIndex = i;
- }
-}
-
-export function getSchema(buf) { return getReader(buf).getSchema(); }
-
-export function getReader(buf) : ArrowReader {
- if (_checkMagic(buf, 0)) {
- return getFileReader(buf);
- } else {
- return getStreamReader(buf);
- }
-}
-
-export function getStreamReader(buf) : ArrowReader {
- var bb = new ByteBuffer(buf);
-
- var schema = _loadSchema(bb),
- field,
- vectors: Vector[] = [],
- i,j,
- iLen,jLen,
- batch,
- recordBatches = [],
- dictionaryBatches = [],
- dictionaries = {};
-
- for (i = 0, iLen = schema.fieldsLength(); i < iLen; i += 1|0) {
- field = schema.fields(i);
- _createDictionaryVectors(field, dictionaries);
- vectors.push(vectorFromField(field, dictionaries));
- }
-
- while (bb.position() < bb.capacity()) {
- batch = _loadBatch(bb);
- if (batch == null) {
- break;
- } else if (batch.type == MessageHeader.DictionaryBatch) {
- dictionaryBatches.push(batch);
- } else if (batch.type == MessageHeader.RecordBatch) {
- recordBatches.push(batch)
- } else {
- console.error("Expected batch type" + MessageHeader.RecordBatch + " or " +
- MessageHeader.DictionaryBatch + " but got " + batch.type);
- }
- }
-
- // load dictionary vectors
- for (i = 0; i < dictionaryBatches.length; i += 1|0) {
- batch = dictionaryBatches[i];
- loadVectors(bb, [dictionaries[batch.id]], batch);
- }
-
- return new ArrowReader(bb, parseSchema(schema), vectors, recordBatches, dictionaries);
-}
-
-export function getFileReader (buf) : ArrowReader {
- var bb = new ByteBuffer(buf);
-
- var footer = _loadFooter(bb);
-
- var schema = footer.schema();
- var i, len, field,
- vectors: Vector[] = [],
- block,
- batch,
- recordBatchBlocks = [],
- dictionaryBatchBlocks = [],
- dictionaries = {};
-
- for (i = 0, len = schema.fieldsLength(); i < len; i += 1|0) {
- field = schema.fields(i);
- _createDictionaryVectors(field, dictionaries);
- vectors.push(vectorFromField(field, dictionaries));
- }
-
- for (i = 0; i < footer.dictionariesLength(); i += 1|0) {
- block = footer.dictionaries(i);
- dictionaryBatchBlocks.push({
- offset: block.offset().low,
- metaDataLength: block.metaDataLength(),
- bodyLength: block.bodyLength().low,
- })
- }
-
- for (i = 0; i < footer.recordBatchesLength(); i += 1|0) {
- block = footer.recordBatches(i);
- recordBatchBlocks.push({
- offset: block.offset().low,
- metaDataLength: block.metaDataLength(),
- bodyLength: block.bodyLength().low,
- })
- }
-
- var dictionaryBatches = dictionaryBatchBlocks.map(function (block) {
- bb.setPosition(block.offset);
- // TODO: Make sure this is a dictionary batch
- return _loadBatch(bb);
- });
-
- var recordBatches = recordBatchBlocks.map(function (block) {
- bb.setPosition(block.offset);
- // TODO: Make sure this is a record batch
- return _loadBatch(bb);
- });
-
- // load dictionary vectors
- for (i = 0; i < dictionaryBatches.length; i += 1|0) {
- batch = dictionaryBatches[i];
- loadVectors(bb, [dictionaries[batch.id]], batch);
- }
-
- return new ArrowReader(bb, parseSchema(schema), vectors, recordBatches, dictionaries);
-}
-
-function _loadFooter(bb) {
- var fileLength: number = bb.bytes_.length;
-
- if (fileLength < MAGIC.length*2 + 4) {
- console.error("file too small " + fileLength);
- return;
- }
-
- if (!_checkMagic(bb.bytes_, 0)) {
- console.error("missing magic bytes at beginning of file")
- return;
- }
-
- if (!_checkMagic(bb.bytes_, fileLength - MAGIC.length)) {
- console.error("missing magic bytes at end of file")
- return;
- }
-
- var footerLengthOffset: number = fileLength - MAGIC.length - 4;
- bb.setPosition(footerLengthOffset);
- var footerLength: number = Int32FromByteBuffer(bb, footerLengthOffset)
-
- if (footerLength <= 0 || footerLength + MAGIC.length*2 + 4 > fileLength) {
- console.log("Invalid footer length: " + footerLength)
- }
-
- var footerOffset: number = footerLengthOffset - footerLength;
- bb.setPosition(footerOffset);
- var footer = Footer.getRootAsFooter(bb);
-
- return footer;
-}
-
-function _loadSchema(bb) {
- var message =_loadMessage(bb);
- if (message.headerType() != MessageHeader.Schema) {
- console.error("Expected header type " + MessageHeader.Schema + " but got " + message.headerType());
- return;
- }
- return message.header(new Schema());
-}
-
-function _loadBatch(bb) {
- var message = _loadMessage(bb);
- if (message == null) {
- return;
- } else if (message.headerType() == MessageHeader.RecordBatch) {
- var batch = { header: message.header(new RecordBatch()), length: message.bodyLength().low }
- return _loadRecordBatch(bb, batch);
- } else if (message.headerType() == MessageHeader.DictionaryBatch) {
- var batch = { header: message.header(new DictionaryBatch()), length: message.bodyLength().low }
- return _loadDictionaryBatch(bb, batch);
- } else {
- console.error("Expected header type " + MessageHeader.RecordBatch + " or " + MessageHeader.DictionaryBatch +
- " but got " + message.headerType());
- return;
- }
-}
-
-function _loadRecordBatch(bb, batch) {
- var data = batch.header;
- var i, nodes_ = [], nodesLength = data.nodesLength();
- var buffer, buffers_ = [], buffersLength = data.buffersLength();
-
- for (i = 0; i < nodesLength; i += 1) {
- nodes_.push(data.nodes(i));
- }
- for (i = 0; i < buffersLength; i += 1) {
- buffer = data.buffers(i);
- buffers_.push({ offset: bb.position() + buffer.offset().low, length: buffer.length().low });
- }
- // position the buffer after the body to read the next message
- bb.setPosition(bb.position() + batch.length);
-
- return { nodes: nodes_, buffers: buffers_, length: data.length().low, type: MessageHeader.RecordBatch };
-}
-
-function _loadDictionaryBatch(bb, batch) {
- var id_ = batch.header.id().toFloat64().toString(), data = batch.header.data();
- var i, nodes_ = [], nodesLength = data.nodesLength();
- var buffer, buffers_ = [], buffersLength = data.buffersLength();
-
- for (i = 0; i < nodesLength; i += 1) {
- nodes_.push(data.nodes(i));
- }
- for (i = 0; i < buffersLength; i += 1) {
- buffer = data.buffers(i);
- buffers_.push({ offset: bb.position() + buffer.offset().low, length: buffer.length().low });
- }
- // position the buffer after the body to read the next message
- bb.setPosition(bb.position() + batch.length);
-
- return { id: id_, nodes: nodes_, buffers: buffers_, length: data.length().low, type: MessageHeader.DictionaryBatch };
-}
-
-function _loadMessage(bb) {
- var messageLength: number = Int32FromByteBuffer(bb, bb.position());
- if (messageLength == 0) {
- return;
- }
- bb.setPosition(bb.position() + 4);
- var message = Message.getRootAsMessage(bb);
- // position the buffer at the end of the message so it's ready to read further
- bb.setPosition(bb.position() + messageLength);
-
- return message;
-}
-
-function _createDictionaryVectors(field, dictionaries) {
- var encoding = field.dictionary();
- if (encoding != null) {
- var id = encoding.id().toFloat64().toString();
- if (dictionaries[id] == null) {
- // create a field for the dictionary
- var dictionaryField = _createDictionaryField(id, field);
- dictionaries[id] = vectorFromField(dictionaryField, null);
- }
- }
-
- // recursively examine child fields
- for (var i = 0, len = field.childrenLength(); i < len; i += 1|0) {
- _createDictionaryVectors(field.children(i), dictionaries);
- }
-}
-
-function _createDictionaryField(id, field) {
- var builder = new flatbuffers.Builder();
- var nameOffset = builder.createString("dict-" + id);
-
- var typeType = field.typeType();
- var typeOffset;
- if (typeType === Type.Int) {
- var type = field.type(new org.apache.arrow.flatbuf.Int());
- org.apache.arrow.flatbuf.Int.startInt(builder);
- org.apache.arrow.flatbuf.Int.addBitWidth(builder, type.bitWidth());
- org.apache.arrow.flatbuf.Int.addIsSigned(builder, type.isSigned());
- typeOffset = org.apache.arrow.flatbuf.Int.endInt(builder);
- } else if (typeType === Type.FloatingPoint) {
- var type = field.type(new org.apache.arrow.flatbuf.FloatingPoint());
- org.apache.arrow.flatbuf.FloatingPoint.startFloatingPoint(builder);
- org.apache.arrow.flatbuf.FloatingPoint.addPrecision(builder, type.precision());
- typeOffset = org.apache.arrow.flatbuf.FloatingPoint.endFloatingPoint(builder);
- } else if (typeType === Type.Utf8) {
- org.apache.arrow.flatbuf.Utf8.startUtf8(builder);
- typeOffset = org.apache.arrow.flatbuf.Utf8.endUtf8(builder);
- } else if (typeType === Type.Date) {
- var type = field.type(new org.apache.arrow.flatbuf.Date());
- org.apache.arrow.flatbuf.Date.startDate(builder);
- org.apache.arrow.flatbuf.Date.addUnit(builder, type.unit());
- typeOffset = org.apache.arrow.flatbuf.Date.endDate(builder);
- } else {
- throw "Unimplemented dictionary type " + typeType;
- }
- if (field.childrenLength() > 0) {
- throw "Dictionary encoded fields can't have children"
- }
- var childrenOffset = org.apache.arrow.flatbuf.Field.createChildrenVector(builder, []);
-
- var layout, layoutOffsets = [];
- for (var i = 0, len = field.layoutLength(); i < len; i += 1|0) {
- layout = field.layout(i);
- org.apache.arrow.flatbuf.VectorLayout.startVectorLayout(builder);
- org.apache.arrow.flatbuf.VectorLayout.addBitWidth(builder, layout.bitWidth());
- org.apache.arrow.flatbuf.VectorLayout.addType(builder, layout.type());
- layoutOffsets.push(org.apache.arrow.flatbuf.VectorLayout.endVectorLayout(builder));
- }
- var layoutOffset = org.apache.arrow.flatbuf.Field.createLayoutVector(builder, layoutOffsets);
-
- org.apache.arrow.flatbuf.Field.startField(builder);
- org.apache.arrow.flatbuf.Field.addName(builder, nameOffset);
- org.apache.arrow.flatbuf.Field.addNullable(builder, field.nullable());
- org.apache.arrow.flatbuf.Field.addTypeType(builder, typeType);
- org.apache.arrow.flatbuf.Field.addType(builder, typeOffset);
- org.apache.arrow.flatbuf.Field.addChildren(builder, childrenOffset);
- org.apache.arrow.flatbuf.Field.addLayout(builder, layoutOffset);
- var offset = org.apache.arrow.flatbuf.Field.endField(builder);
- builder.finish(offset);
-
- return org.apache.arrow.flatbuf.Field.getRootAsField(builder.bb);
-}
-
-function Int32FromByteBuffer(bb, offset) {
- return ((bb.bytes_[offset + 3] & 255) << 24) |
- ((bb.bytes_[offset + 2] & 255) << 16) |
- ((bb.bytes_[offset + 1] & 255) << 8) |
- ((bb.bytes_[offset] & 255));
-}
-
-var MAGIC_STR = "ARROW1";
-var MAGIC = new Uint8Array(MAGIC_STR.length);
-for (var i = 0; i < MAGIC_STR.length; i += 1|0) {
- MAGIC[i] = MAGIC_STR.charCodeAt(i);
-}
-
-function _checkMagic(buf, index) {
- for (var i = 0; i < MAGIC.length; i += 1|0) {
- if (MAGIC[i] != buf[index + i]) {
- return false;
- }
- }
- return true;
-}
-
-var TYPEMAP = {}
-TYPEMAP[Type.NONE] = "NONE";
-TYPEMAP[Type.Null] = "Null";
-TYPEMAP[Type.Int] = "Int";
-TYPEMAP[Type.FloatingPoint] = "FloatingPoint";
-TYPEMAP[Type.Binary] = "Binary";
-TYPEMAP[Type.Utf8] = "Utf8";
-TYPEMAP[Type.Bool] = "Bool";
-TYPEMAP[Type.Decimal] = "Decimal";
-TYPEMAP[Type.Date] = "Date";
-TYPEMAP[Type.Time] = "Time";
-TYPEMAP[Type.Timestamp] = "Timestamp";
-TYPEMAP[Type.Interval] = "Interval";
-TYPEMAP[Type.List] = "List";
-TYPEMAP[Type.FixedSizeList] = "FixedSizeList";
-TYPEMAP[Type.Struct_] = "Struct";
-TYPEMAP[Type.Union] = "Union";
-
-var VECTORTYPEMAP = {};
-VECTORTYPEMAP[VectorType.OFFSET] = 'OFFSET';
-VECTORTYPEMAP[VectorType.DATA] = 'DATA';
-VECTORTYPEMAP[VectorType.VALIDITY] = 'VALIDITY';
-VECTORTYPEMAP[VectorType.TYPE] = 'TYPE';
-
-function parseField(field) {
- var children = [];
- for (var i = 0; i < field.childrenLength(); i += 1|0) {
- children.push(parseField(field.children(i)));
- }
-
- var layouts = [];
- for (var i = 0; i < field.layoutLength(); i += 1|0) {
- layouts.push(VECTORTYPEMAP[field.layout(i).type()]);
- }
-
- return {
- name: field.name(),
- nullable: field.nullable(),
- type: TYPEMAP[field.typeType()],
- children: children,
- layout: layouts
- };
-}
-
-function parseSchema(schema) {
- var result = [];
- var this_result, type;
- for (var i = 0, len = schema.fieldsLength(); i < len; i += 1|0) {
- result.push(parseField(schema.fields(i)));
- }
- return result;
-}
-
-function loadVectors(bb, vectors: Vector[], recordBatch) {
- var indices = { bufferIndex: 0, nodeIndex: 0 }, i;
- for (i = 0; i < vectors.length; i += 1) {
- loadVector(bb, vectors[i], recordBatch, indices);
- }
-}
-
-/**
- * Loads a vector with data from a batch
- * recordBatch: { nodes: org.apache.arrow.flatbuf.FieldNode[], buffers: { offset: number, length: number }[] }
- */
-function loadVector(bb, vector: Vector, recordBatch, indices) {
- var node = recordBatch.nodes[indices.nodeIndex], ownBuffersLength, ownBuffers = [], i;
- indices.nodeIndex += 1;
-
- // dictionary vectors are always ints, so will have a data vector plus optional null vector
- if (vector.field.dictionary() == null) {
- ownBuffersLength = vector.field.layoutLength();
- } else if (vector.field.nullable()) {
- ownBuffersLength = 2;
- } else {
- ownBuffersLength = 1;
- }
-
- for (i = 0; i < ownBuffersLength; i += 1) {
- ownBuffers.push(recordBatch.buffers[indices.bufferIndex + i]);
- }
- indices.bufferIndex += ownBuffersLength;
-
- vector.loadData(bb, node, ownBuffers);
-
- var children = vector.getChildVectors();
- for (i = 0; i < children.length; i++) {
- loadVector(bb, children[i], recordBatch, indices);
- }
-}
http://git-wip-us.apache.org/repos/asf/arrow/blob/2d6453b2/js/lib/bitarray.ts
----------------------------------------------------------------------
diff --git a/js/lib/bitarray.ts b/js/lib/bitarray.ts
deleted file mode 100644
index fc3c091..0000000
--- a/js/lib/bitarray.ts
+++ /dev/null
@@ -1,42 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-export class BitArray {
- private view: Uint8Array;
-
- constructor(buffer: ArrayBuffer, offset: number, length: number) {
- this.view = new Uint8Array(buffer, offset || 0, Math.ceil(length / 8));
- }
-
- get(i) {
- var index = (i >> 3) | 0; // | 0 converts to an int. Math.floor works too.
- var bit = i % 8; // i % 8 is just as fast as i & 7
- return (this.view[index] & (1 << bit)) !== 0;
- }
-
- set(i) {
- var index = (i >> 3) | 0;
- var bit = i % 8;
- this.view[index] |= 1 << bit;
- }
-
- unset(i) {
- var index = (i >> 3) | 0;
- var bit = i % 8;
- this.view[index] &= ~(1 << bit);
- }
-}
http://git-wip-us.apache.org/repos/asf/arrow/blob/2d6453b2/js/lib/types.ts
----------------------------------------------------------------------
diff --git a/js/lib/types.ts b/js/lib/types.ts
deleted file mode 100644
index d656c6a..0000000
--- a/js/lib/types.ts
+++ /dev/null
@@ -1,589 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-import { BitArray } from './bitarray';
-import { TextDecoder } from 'text-encoding';
-import { org } from './Arrow_generated';
-
-var Type = org.apache.arrow.flatbuf.Type;
-
-interface ArrayView {
- slice(start: number, end: number) : ArrayView
- toString() : string
-}
-
-export abstract class Vector {
- field: any;
- name: string;
- length: number;
- null_count: number;
-
- constructor(field) {
- this.field = field;
- this.name = field.name();
- }
-
- /* Access datum at index i */
- abstract get(i);
- /* Return array representing data in the range [start, end) */
- abstract slice(start: number, end: number);
- /* Return array of child vectors, for container types */
- abstract getChildVectors();
-
- /**
- * Use recordBatch fieldNodes and Buffers to construct this Vector
- * bb: flatbuffers.ByteBuffer
- * node: org.apache.arrow.flatbuf.FieldNode
- * buffers: { offset: number, length: number }[]
- */
- public loadData(bb, node, buffers) {
- this.length = node.length().low;
- this.null_count = node.nullCount().low;
- this.loadBuffers(bb, node, buffers);
- }
-
- protected abstract loadBuffers(bb, node, buffers);
-
- /**
- * Helper function for loading a VALIDITY buffer (for Nullable types)
- * bb: flatbuffers.ByteBuffer
- * buffer: org.apache.arrow.flatbuf.Buffer
- */
- static loadValidityBuffer(bb, buffer) : BitArray {
- var arrayBuffer = bb.bytes_.buffer;
- var offset = bb.bytes_.byteOffset + buffer.offset;
- return new BitArray(arrayBuffer, offset, buffer.length * 8);
- }
-
- /**
- * Helper function for loading an OFFSET buffer
- * buffer: org.apache.arrow.flatbuf.Buffer
- */
- static loadOffsetBuffer(bb, buffer) : Int32Array {
- var arrayBuffer = bb.bytes_.buffer;
- var offset = bb.bytes_.byteOffset + buffer.offset;
- var length = buffer.length / Int32Array.BYTES_PER_ELEMENT;
- return new Int32Array(arrayBuffer, offset, length);
- }
-
-}
-
-class SimpleVector<T extends ArrayView> extends Vector {
- protected dataView: T;
- private TypedArray: { new(buffer: any, offset: number, length: number): T, BYTES_PER_ELEMENT: number };
-
- constructor (field, TypedArray: { new(buffer: any, offset: number, length: number): T, BYTES_PER_ELEMENT: number }) {
- super(field);
- this.TypedArray = TypedArray;
- }
-
- getChildVectors() {
- return [];
- }
-
- get(i) {
- return this.dataView[i];
- }
-
- loadBuffers(bb, node, buffers) {
- this.loadDataBuffer(bb, buffers[0]);
- }
-
- /**
- * buffer: org.apache.arrow.flatbuf.Buffer
- */
- protected loadDataBuffer(bb, buffer) {
- var arrayBuffer = bb.bytes_.buffer;
- var offset = bb.bytes_.byteOffset + buffer.offset;
- var length = buffer.length / this.TypedArray.BYTES_PER_ELEMENT;
- this.dataView = new this.TypedArray(arrayBuffer, offset, length);
- }
-
- getDataView() {
- return this.dataView;
- }
-
- toString() {
- return this.dataView.toString();
- }
-
- slice(start, end) {
- return this.dataView.slice(start, end);
- }
-}
-
-class NullableSimpleVector<T extends ArrayView> extends SimpleVector<T> {
-
- protected validityView: BitArray;
-
- get(i: number) {
- if (this.validityView.get(i)) {
- return this.dataView[i];
- } else {
- return null;
- }
- }
-
- loadBuffers(bb, node, buffers) {
- this.validityView = Vector.loadValidityBuffer(bb, buffers[0]);
- this.loadDataBuffer(bb, buffers[1]);
- }
-
- getValidityVector() {
- return this.validityView;
- }
-}
-
-class Uint8Vector extends SimpleVector<Uint8Array> { constructor(field) { super(field, Uint8Array); }; }
-class Uint16Vector extends SimpleVector<Uint16Array> { constructor(field) { super(field, Uint16Array); }; }
-class Uint32Vector extends SimpleVector<Uint32Array> { constructor(field) { super(field, Uint32Array); }; }
-class Int8Vector extends SimpleVector<Uint8Array> { constructor(field) { super(field, Uint8Array); }; }
-class Int16Vector extends SimpleVector<Uint16Array> { constructor(field) { super(field, Uint16Array); }; }
-class Int32Vector extends SimpleVector<Uint32Array> { constructor(field) { super(field, Uint32Array); }; }
-class Float32Vector extends SimpleVector<Float32Array> { constructor(field) { super(field, Float32Array); }; }
-class Float64Vector extends SimpleVector<Float64Array> { constructor(field) { super(field, Float64Array); }; }
-
-class NullableUint8Vector extends NullableSimpleVector<Uint8Array> { constructor(field) { super(field, Uint8Array); }; }
-class NullableUint16Vector extends NullableSimpleVector<Uint16Array> { constructor(field) { super(field, Uint16Array); }; }
-class NullableUint32Vector extends NullableSimpleVector<Uint32Array> { constructor(field) { super(field, Uint32Array); }; }
-class NullableInt8Vector extends NullableSimpleVector<Uint8Array> { constructor(field) { super(field, Uint8Array); }; }
-class NullableInt16Vector extends NullableSimpleVector<Uint16Array> { constructor(field) { super(field, Uint16Array); }; }
-class NullableInt32Vector extends NullableSimpleVector<Uint32Array> { constructor(field) { super(field, Uint32Array); }; }
-class NullableFloat32Vector extends NullableSimpleVector<Float32Array> { constructor(field) { super(field, Float32Array); }; }
-class NullableFloat64Vector extends NullableSimpleVector<Float64Array> { constructor(field) { super(field, Float64Array); }; }
-
-class Uint64Vector extends SimpleVector<Uint32Array> {
- constructor(field) {
- super(field, Uint32Array);
- }
-
- get(i: number) {
- return { low: this.dataView[i * 2], high: this.dataView[(i * 2) + 1] };
- }
-}
-
-class NullableUint64Vector extends NullableSimpleVector<Uint32Array> {
- constructor(field) {
- super(field, Uint32Array);
- }
-
- get(i: number) {
- if (this.validityView.get(i)) {
- return { low: this.dataView[i * 2], high: this.dataView[(i * 2) + 1] };
- } else {
- return null;
- }
- }
-}
-
-class Int64Vector extends NullableSimpleVector<Uint32Array> {
- constructor(field) {
- super(field, Uint32Array);
- }
-
- get(i: number) {
- return { low: this.dataView[i * 2], high: this.dataView[(i * 2) + 1] };
- }
-}
-
-class NullableInt64Vector extends NullableSimpleVector<Uint32Array> {
- constructor(field) {
- super(field, Uint32Array);
- }
-
- get(i: number) {
- if (this.validityView.get(i)) {
- return { low: this.dataView[i * 2], high: this.dataView[(i * 2) + 1] };
- } else {
- return null;
- }
- }
-}
-
-class DateVector extends SimpleVector<Uint32Array> {
- constructor(field) {
- super(field, Uint32Array);
- }
-
- get (i) {
- return new Date(super.get(2*i+1)*Math.pow(2,32) + super.get(2*i));
- }
-}
-
-class NullableDateVector extends DateVector {
- private validityView: BitArray;
-
- loadBuffers(bb, node, buffers) {
- this.validityView = Vector.loadValidityBuffer(bb, buffers[0]);
- this.loadDataBuffer(bb, buffers[1]);
- }
-
- get (i) {
- if (this.validityView.get(i)) {
- return super.get(i);
- } else {
- return null;
- }
- }
-
- getValidityVector() {
- return this.validityView;
- }
-}
-
-class Utf8Vector extends SimpleVector<Uint8Array> {
- protected offsetView: Int32Array;
- static decoder: TextDecoder = new TextDecoder('utf8');
-
- constructor(field) {
- super(field, Uint8Array);
- }
-
- loadBuffers(bb, node, buffers) {
- this.offsetView = Vector.loadOffsetBuffer(bb, buffers[0]);
- this.loadDataBuffer(bb, buffers[1]);
- }
-
- get(i) {
- return Utf8Vector.decoder.decode(this.dataView.slice(this.offsetView[i], this.offsetView[i + 1]));
- }
-
- slice(start: number, end: number) {
- var result: string[] = [];
- for (var i: number = start; i < end; i += 1|0) {
- result.push(this.get(i));
- }
- return result;
- }
-
- getOffsetView() {
- return this.offsetView;
- }
-}
-
-class NullableUtf8Vector extends Utf8Vector {
- private validityView: BitArray;
-
- loadBuffers(bb, node, buffers) {
- this.validityView = Vector.loadValidityBuffer(bb, buffers[0]);
- this.offsetView = Vector.loadOffsetBuffer(bb, buffers[1]);
- this.loadDataBuffer(bb, buffers[2]);
- }
-
- get(i) {
- if (this.validityView.get(i)) {
- return super.get(i);
- } else {
- return null;
- }
- }
-
- getValidityVector() {
- return this.validityView;
- }
-}
-
-// Nested Types
-class ListVector extends Uint32Vector {
- private dataVector: Vector;
-
- constructor(field, dataVector: Vector) {
- super(field);
- this.dataVector = dataVector;
- }
-
- getChildVectors() {
- return [this.dataVector];
- }
-
- loadBuffers(bb, node, buffers) {
- super.loadBuffers(bb, node, buffers);
- this.length -= 1;
- }
-
- get(i) {
- var offset = super.get(i)
- if (offset === null) {
- return null;
- }
- var next_offset = super.get(i + 1)
- return this.dataVector.slice(offset, next_offset)
- }
-
- toString() {
- return "length: " + (this.length);
- }
-
- slice(start: number, end: number) {
- var result = [];
- for (var i = start; i < end; i += 1|0) {
- result.push(this.get(i));
- }
- return result;
- }
-}
-
-class NullableListVector extends ListVector {
- private validityView: BitArray;
-
- loadBuffers(bb, node, buffers) {
- this.validityView = Vector.loadValidityBuffer(bb, buffers[0]);
- this.loadDataBuffer(bb, buffers[1]);
- this.length -= 1;
- }
-
- get(i) {
- if (this.validityView.get(i)) {
- return super.get(i);
- } else {
- return null;
- }
- }
-
- getValidityVector() {
- return this.validityView;
- }
-}
-
-class FixedSizeListVector extends Vector {
- private size: number
- private dataVector: Vector;
-
- constructor(field, size: number, dataVector: Vector) {
- super(field);
- this.size = size;
- this.dataVector = dataVector;
- }
-
- getChildVectors() {
- return [this.dataVector];
- }
-
- loadBuffers(bb, node, buffers) {
- // no buffers to load
- }
-
- get(i: number) {
- return this.dataVector.slice(i * this.size, (i + 1) * this.size);
- }
-
- slice(start : number, end : number) {
- var result = [];
- for (var i = start; i < end; i += 1|0) {
- result.push(this.get(i));
- }
- return result;
- }
-
- getListSize() {
- return this.size;
- }
-}
-
-class NullableFixedSizeListVector extends FixedSizeListVector {
- private validityView: BitArray;
-
- loadBuffers(bb, node, buffers) {
- this.validityView = Vector.loadValidityBuffer(bb, buffers[0]);
- }
-
- get(i: number) {
- if (this.validityView.get(i)) {
- return super.get(i);
- } else {
- return null;
- }
- }
-
- getValidityVector() {
- return this.validityView;
- }
-}
-
-class StructVector extends Vector {
- private validityView: BitArray;
- private vectors: Vector[];
-
- constructor(field, vectors: Vector[]) {
- super(field);
- this.vectors = vectors;
- }
-
- getChildVectors() {
- return this.vectors;
- }
-
- loadBuffers(bb, node, buffers) {
- this.validityView = Vector.loadValidityBuffer(bb, buffers[0]);
- }
-
- get(i : number) {
- if (this.validityView.get(i)) {
- return this.vectors.map((v: Vector) => v.get(i));
- } else {
- return null;
- }
- }
-
- slice(start : number, end : number) {
- var result = [];
- for (var i = start; i < end; i += 1|0) {
- result.push(this.get(i));
- }
- return result;
- }
-
- getValidityVector() {
- return this.validityView;
- }
-}
-
-class DictionaryVector extends Vector {
-
- private indices: Vector;
- private dictionary: Vector;
-
- constructor (field, indices: Vector, dictionary: Vector) {
- super(field);
- this.indices = indices;
- this.dictionary = dictionary;
- }
-
- get(i) {
- var encoded = this.indices.get(i);
- if (encoded == null) {
- return null;
- } else {
- return this.dictionary.get(encoded);
- }
- }
-
- /** Get the dictionary encoded value */
- public getEncoded(i) {
- return this.indices.get(i);
- }
-
- slice(start, end) {
- return this.indices.slice(start, end); // TODO decode
- }
-
- getChildVectors() {
- return this.indices.getChildVectors();
- }
-
- loadBuffers(bb, node, buffers) {
- this.indices.loadData(bb, node, buffers);
- }
-
- /** Get the index (encoded) vector */
- public getIndexVector() {
- return this.indices;
- }
-
- /** Get the dictionary vector */
- public getDictionaryVector() {
- return this.dictionary;
- }
-
- toString() {
- return this.indices.toString();
- }
-}
-
-export function vectorFromField(field, dictionaries) : Vector {
- var dictionary = field.dictionary(), nullable = field.nullable();
- if (dictionary == null) {
- var typeType = field.typeType();
- if (typeType === Type.List) {
- var dataVector = vectorFromField(field.children(0), dictionaries);
- return nullable ? new NullableListVector(field, dataVector) : new ListVector(field, dataVector);
- } else if (typeType === Type.FixedSizeList) {
- var dataVector = vectorFromField(field.children(0), dictionaries);
- var size = field.type(new org.apache.arrow.flatbuf.FixedSizeList()).listSize();
- if (nullable) {
- return new NullableFixedSizeListVector(field, size, dataVector);
- } else {
- return new FixedSizeListVector(field, size, dataVector);
- }
- } else if (typeType === Type.Struct_) {
- var vectors : Vector[] = [];
- for (var i : number = 0; i < field.childrenLength(); i += 1|0) {
- vectors.push(vectorFromField(field.children(i), dictionaries));
- }
- return new StructVector(field, vectors);
- } else {
- if (typeType === Type.Int) {
- var type = field.type(new org.apache.arrow.flatbuf.Int());
- return _createIntVector(field, type.bitWidth(), type.isSigned(), nullable)
- } else if (typeType === Type.FloatingPoint) {
- var precision = field.type(new org.apache.arrow.flatbuf.FloatingPoint()).precision();
- if (precision == org.apache.arrow.flatbuf.Precision.SINGLE) {
- return nullable ? new NullableFloat32Vector(field) : new Float32Vector(field);
- } else if (precision == org.apache.arrow.flatbuf.Precision.DOUBLE) {
- return nullable ? new NullableFloat64Vector(field) : new Float64Vector(field);
- } else {
- throw "Unimplemented FloatingPoint precision " + precision;
- }
- } else if (typeType === Type.Utf8) {
- return nullable ? new NullableUtf8Vector(field) : new Utf8Vector(field);
- } else if (typeType === Type.Date) {
- return nullable ? new NullableDateVector(field) : new DateVector(field);
- } else {
- throw "Unimplemented type " + typeType;
- }
- }
- } else {
- // determine arrow type - default is signed 32 bit int
- var type = dictionary.indexType(), bitWidth = 32, signed = true;
- if (type != null) {
- bitWidth = type.bitWidth();
- signed = type.isSigned();
- }
- var indices = _createIntVector(field, bitWidth, signed, nullable);
- return new DictionaryVector(field, indices, dictionaries[dictionary.id().toFloat64().toString()]);
- }
-}
-
-function _createIntVector(field, bitWidth, signed, nullable) {
- if (bitWidth == 64) {
- if (signed) {
- return nullable ? new NullableInt64Vector(field) : new Int64Vector(field);
- } else {
- return nullable ? new NullableUint64Vector(field) : new Uint64Vector(field);
- }
- } else if (bitWidth == 32) {
- if (signed) {
- return nullable ? new NullableInt32Vector(field) : new Int32Vector(field);
- } else {
- return nullable ? new NullableUint32Vector(field) : new Uint32Vector(field);
- }
- } else if (bitWidth == 16) {
- if (signed) {
- return nullable ? new NullableInt16Vector(field) : new Int16Vector(field);
- } else {
- return nullable ? new NullableUint16Vector(field) : new Uint16Vector(field);
- }
- } else if (bitWidth == 8) {
- if (signed) {
- return nullable ? new NullableInt8Vector(field) : new Int8Vector(field);
- } else {
- return nullable ? new NullableUint8Vector(field) : new Uint8Vector(field);
- }
- } else {
- throw "Unimplemented Int bit width " + bitWidth;
- }
-}
http://git-wip-us.apache.org/repos/asf/arrow/blob/2d6453b2/js/package.json
----------------------------------------------------------------------
diff --git a/js/package.json b/js/package.json
index 8687f50..1739e38 100644
--- a/js/package.json
+++ b/js/package.json
@@ -2,17 +2,21 @@
"name": "arrow",
"version": "0.0.0",
"description": "",
- "main": "dist/arrow.js",
+ "main": "lib/arrow.js",
"scripts": {
- "postinstall": "./flatbuffers.sh",
- "build": "./flatbuffers.sh && tsc && webpack",
+ "build": "./flatbuffers.sh && tsc && tsc -m es6 --outDir lib-esm && webpack",
+ "clean": "rm -rf lib lib-esm _bundles",
"test": "./node_modules/mocha/bin/mocha ./spec/arrow.js"
},
"author": "",
+ "repository": "https://github.com/apache/arrow/",
"license": "Apache-2.0",
"devDependencies": {
+ "awesome-typescript-loader": "^3.1.3",
"chai": "^3.5.0",
"mocha": "^3.3.0",
+ "typescript": "^2.3.2",
+ "uglifyjs-webpack-plugin": "^0.4.3",
"webpack": "^2.3.3"
},
"dependencies": {
http://git-wip-us.apache.org/repos/asf/arrow/blob/2d6453b2/js/spec/arrow.js
----------------------------------------------------------------------
diff --git a/js/spec/arrow.js b/js/spec/arrow.js
index 61a6f81..52c586b 100644
--- a/js/spec/arrow.js
+++ b/js/spec/arrow.js
@@ -19,7 +19,7 @@ var fs = require('fs');
var chai = require('chai');
var assert = chai.assert;
var path= require('path');
-var arrow = require('../dist/arrow.js');
+var arrow = require('../lib/arrow.js');
test_files = [
{
http://git-wip-us.apache.org/repos/asf/arrow/blob/2d6453b2/js/src/Arrow_generated.d.ts
----------------------------------------------------------------------
diff --git a/js/src/Arrow_generated.d.ts b/js/src/Arrow_generated.d.ts
new file mode 100644
index 0000000..1f5b454
--- /dev/null
+++ b/js/src/Arrow_generated.d.ts
@@ -0,0 +1,5 @@
+export var org: {
+ apache: {
+ arrow: any
+ }
+}
http://git-wip-us.apache.org/repos/asf/arrow/blob/2d6453b2/js/src/arrow.ts
----------------------------------------------------------------------
diff --git a/js/src/arrow.ts b/js/src/arrow.ts
new file mode 100644
index 0000000..74def4d
--- /dev/null
+++ b/js/src/arrow.ts
@@ -0,0 +1,493 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { flatbuffers } from 'flatbuffers';
+import { org } from './Arrow_generated';
+import { vectorFromField, Vector } from './types';
+
+import ByteBuffer = flatbuffers.ByteBuffer;
+var Footer = org.apache.arrow.flatbuf.Footer;
+var Message = org.apache.arrow.flatbuf.Message;
+var MessageHeader = org.apache.arrow.flatbuf.MessageHeader;
+var RecordBatch = org.apache.arrow.flatbuf.RecordBatch;
+var DictionaryBatch = org.apache.arrow.flatbuf.DictionaryBatch;
+var Schema = org.apache.arrow.flatbuf.Schema;
+var Type = org.apache.arrow.flatbuf.Type;
+var VectorType = org.apache.arrow.flatbuf.VectorType;
+
+export class ArrowReader {
+
+ private bb;
+ private schema: any = [];
+ private vectors: Vector[];
+ private vectorMap: any = {};
+ private dictionaries: any = {};
+ private batches: any = [];
+ private batchIndex: number = 0;
+
+ constructor(bb, schema, vectors: Vector[], batches, dictionaries) {
+ this.bb = bb;
+ this.schema = schema;
+ this.vectors = vectors;
+ for (var i = 0; i < vectors.length; i += 1|0) {
+ this.vectorMap[vectors[i].name] = vectors[i]
+ }
+ this.batches = batches;
+ this.dictionaries = dictionaries;
+ }
+
+ loadNextBatch() {
+ if (this.batchIndex < this.batches.length) {
+ var batch = this.batches[this.batchIndex];
+ this.batchIndex += 1;
+ loadVectors(this.bb, this.vectors, batch);
+ return batch.length;
+ } else {
+ return 0;
+ }
+ }
+
+ getSchema() {
+ return this.schema;
+ }
+
+ getVectors() {
+ return this.vectors;
+ }
+
+ getVector(name) {
+ return this.vectorMap[name];
+ }
+
+ getBatchCount() {
+ return this.batches.length;
+ }
+
+ // the index of the next batch to be loaded
+ getBatchIndex() {
+ return this.batchIndex;
+ }
+
+ // set the index of the next batch to be loaded
+ setBatchIndex(i: number) {
+ this.batchIndex = i;
+ }
+}
+
+export function getSchema(buf) { return getReader(buf).getSchema(); }
+
+export function getReader(buf) : ArrowReader {
+ if (_checkMagic(buf, 0)) {
+ return getFileReader(buf);
+ } else {
+ return getStreamReader(buf);
+ }
+}
+
+export function getStreamReader(buf) : ArrowReader {
+ var bb = new ByteBuffer(buf);
+
+ var schema = _loadSchema(bb),
+ field,
+ vectors: Vector[] = [],
+ i,j,
+ iLen,jLen,
+ batch,
+ recordBatches = [],
+ dictionaryBatches = [],
+ dictionaries = {};
+
+ for (i = 0, iLen = schema.fieldsLength(); i < iLen; i += 1|0) {
+ field = schema.fields(i);
+ _createDictionaryVectors(field, dictionaries);
+ vectors.push(vectorFromField(field, dictionaries));
+ }
+
+ while (bb.position() < bb.capacity()) {
+ batch = _loadBatch(bb);
+ if (batch == null) {
+ break;
+ } else if (batch.type == MessageHeader.DictionaryBatch) {
+ dictionaryBatches.push(batch);
+ } else if (batch.type == MessageHeader.RecordBatch) {
+ recordBatches.push(batch)
+ } else {
+ console.error("Expected batch type" + MessageHeader.RecordBatch + " or " +
+ MessageHeader.DictionaryBatch + " but got " + batch.type);
+ }
+ }
+
+ // load dictionary vectors
+ for (i = 0; i < dictionaryBatches.length; i += 1|0) {
+ batch = dictionaryBatches[i];
+ loadVectors(bb, [dictionaries[batch.id]], batch);
+ }
+
+ return new ArrowReader(bb, parseSchema(schema), vectors, recordBatches, dictionaries);
+}
+
+export function getFileReader (buf) : ArrowReader {
+ var bb = new ByteBuffer(buf);
+
+ var footer = _loadFooter(bb);
+
+ var schema = footer.schema();
+ var i, len, field,
+ vectors: Vector[] = [],
+ block,
+ batch,
+ recordBatchBlocks = [],
+ dictionaryBatchBlocks = [],
+ dictionaries = {};
+
+ for (i = 0, len = schema.fieldsLength(); i < len; i += 1|0) {
+ field = schema.fields(i);
+ _createDictionaryVectors(field, dictionaries);
+ vectors.push(vectorFromField(field, dictionaries));
+ }
+
+ for (i = 0; i < footer.dictionariesLength(); i += 1|0) {
+ block = footer.dictionaries(i);
+ dictionaryBatchBlocks.push({
+ offset: block.offset().low,
+ metaDataLength: block.metaDataLength(),
+ bodyLength: block.bodyLength().low,
+ })
+ }
+
+ for (i = 0; i < footer.recordBatchesLength(); i += 1|0) {
+ block = footer.recordBatches(i);
+ recordBatchBlocks.push({
+ offset: block.offset().low,
+ metaDataLength: block.metaDataLength(),
+ bodyLength: block.bodyLength().low,
+ })
+ }
+
+ var dictionaryBatches = dictionaryBatchBlocks.map(function (block) {
+ bb.setPosition(block.offset);
+ // TODO: Make sure this is a dictionary batch
+ return _loadBatch(bb);
+ });
+
+ var recordBatches = recordBatchBlocks.map(function (block) {
+ bb.setPosition(block.offset);
+ // TODO: Make sure this is a record batch
+ return _loadBatch(bb);
+ });
+
+ // load dictionary vectors
+ for (i = 0; i < dictionaryBatches.length; i += 1|0) {
+ batch = dictionaryBatches[i];
+ loadVectors(bb, [dictionaries[batch.id]], batch);
+ }
+
+ return new ArrowReader(bb, parseSchema(schema), vectors, recordBatches, dictionaries);
+}
+
+function _loadFooter(bb) {
+ var fileLength: number = bb.bytes_.length;
+
+ if (fileLength < MAGIC.length*2 + 4) {
+ console.error("file too small " + fileLength);
+ return;
+ }
+
+ if (!_checkMagic(bb.bytes_, 0)) {
+ console.error("missing magic bytes at beginning of file")
+ return;
+ }
+
+ if (!_checkMagic(bb.bytes_, fileLength - MAGIC.length)) {
+ console.error("missing magic bytes at end of file")
+ return;
+ }
+
+ var footerLengthOffset: number = fileLength - MAGIC.length - 4;
+ bb.setPosition(footerLengthOffset);
+ var footerLength: number = Int32FromByteBuffer(bb, footerLengthOffset)
+
+ if (footerLength <= 0 || footerLength + MAGIC.length*2 + 4 > fileLength) {
+ console.log("Invalid footer length: " + footerLength)
+ }
+
+ var footerOffset: number = footerLengthOffset - footerLength;
+ bb.setPosition(footerOffset);
+ var footer = Footer.getRootAsFooter(bb);
+
+ return footer;
+}
+
+function _loadSchema(bb) {
+ var message =_loadMessage(bb);
+ if (message.headerType() != MessageHeader.Schema) {
+ console.error("Expected header type " + MessageHeader.Schema + " but got " + message.headerType());
+ return;
+ }
+ return message.header(new Schema());
+}
+
+function _loadBatch(bb) {
+ var message = _loadMessage(bb);
+ if (message == null) {
+ return;
+ } else if (message.headerType() == MessageHeader.RecordBatch) {
+ var batch = { header: message.header(new RecordBatch()), length: message.bodyLength().low }
+ return _loadRecordBatch(bb, batch);
+ } else if (message.headerType() == MessageHeader.DictionaryBatch) {
+ var batch = { header: message.header(new DictionaryBatch()), length: message.bodyLength().low }
+ return _loadDictionaryBatch(bb, batch);
+ } else {
+ console.error("Expected header type " + MessageHeader.RecordBatch + " or " + MessageHeader.DictionaryBatch +
+ " but got " + message.headerType());
+ return;
+ }
+}
+
+function _loadRecordBatch(bb, batch) {
+ var data = batch.header;
+ var i, nodes_ = [], nodesLength = data.nodesLength();
+ var buffer, buffers_ = [], buffersLength = data.buffersLength();
+
+ for (i = 0; i < nodesLength; i += 1) {
+ nodes_.push(data.nodes(i));
+ }
+ for (i = 0; i < buffersLength; i += 1) {
+ buffer = data.buffers(i);
+ buffers_.push({ offset: bb.position() + buffer.offset().low, length: buffer.length().low });
+ }
+ // position the buffer after the body to read the next message
+ bb.setPosition(bb.position() + batch.length);
+
+ return { nodes: nodes_, buffers: buffers_, length: data.length().low, type: MessageHeader.RecordBatch };
+}
+
+function _loadDictionaryBatch(bb, batch) {
+ var id_ = batch.header.id().toFloat64().toString(), data = batch.header.data();
+ var i, nodes_ = [], nodesLength = data.nodesLength();
+ var buffer, buffers_ = [], buffersLength = data.buffersLength();
+
+ for (i = 0; i < nodesLength; i += 1) {
+ nodes_.push(data.nodes(i));
+ }
+ for (i = 0; i < buffersLength; i += 1) {
+ buffer = data.buffers(i);
+ buffers_.push({ offset: bb.position() + buffer.offset().low, length: buffer.length().low });
+ }
+ // position the buffer after the body to read the next message
+ bb.setPosition(bb.position() + batch.length);
+
+ return { id: id_, nodes: nodes_, buffers: buffers_, length: data.length().low, type: MessageHeader.DictionaryBatch };
+}
+
+function _loadMessage(bb) {
+ var messageLength: number = Int32FromByteBuffer(bb, bb.position());
+ if (messageLength == 0) {
+ return;
+ }
+ bb.setPosition(bb.position() + 4);
+ var message = Message.getRootAsMessage(bb);
+ // position the buffer at the end of the message so it's ready to read further
+ bb.setPosition(bb.position() + messageLength);
+
+ return message;
+}
+
+function _createDictionaryVectors(field, dictionaries) {
+ var encoding = field.dictionary();
+ if (encoding != null) {
+ var id = encoding.id().toFloat64().toString();
+ if (dictionaries[id] == null) {
+ // create a field for the dictionary
+ var dictionaryField = _createDictionaryField(id, field);
+ dictionaries[id] = vectorFromField(dictionaryField, null);
+ }
+ }
+
+ // recursively examine child fields
+ for (var i = 0, len = field.childrenLength(); i < len; i += 1|0) {
+ _createDictionaryVectors(field.children(i), dictionaries);
+ }
+}
+
+function _createDictionaryField(id, field) {
+ var builder = new flatbuffers.Builder();
+ var nameOffset = builder.createString("dict-" + id);
+
+ var typeType = field.typeType();
+ var typeOffset;
+ if (typeType === Type.Int) {
+ var type = field.type(new org.apache.arrow.flatbuf.Int());
+ org.apache.arrow.flatbuf.Int.startInt(builder);
+ org.apache.arrow.flatbuf.Int.addBitWidth(builder, type.bitWidth());
+ org.apache.arrow.flatbuf.Int.addIsSigned(builder, type.isSigned());
+ typeOffset = org.apache.arrow.flatbuf.Int.endInt(builder);
+ } else if (typeType === Type.FloatingPoint) {
+ var type = field.type(new org.apache.arrow.flatbuf.FloatingPoint());
+ org.apache.arrow.flatbuf.FloatingPoint.startFloatingPoint(builder);
+ org.apache.arrow.flatbuf.FloatingPoint.addPrecision(builder, type.precision());
+ typeOffset = org.apache.arrow.flatbuf.FloatingPoint.endFloatingPoint(builder);
+ } else if (typeType === Type.Utf8) {
+ org.apache.arrow.flatbuf.Utf8.startUtf8(builder);
+ typeOffset = org.apache.arrow.flatbuf.Utf8.endUtf8(builder);
+ } else if (typeType === Type.Date) {
+ var type = field.type(new org.apache.arrow.flatbuf.Date());
+ org.apache.arrow.flatbuf.Date.startDate(builder);
+ org.apache.arrow.flatbuf.Date.addUnit(builder, type.unit());
+ typeOffset = org.apache.arrow.flatbuf.Date.endDate(builder);
+ } else {
+ throw "Unimplemented dictionary type " + typeType;
+ }
+ if (field.childrenLength() > 0) {
+ throw "Dictionary encoded fields can't have children"
+ }
+ var childrenOffset = org.apache.arrow.flatbuf.Field.createChildrenVector(builder, []);
+
+ var layout, layoutOffsets = [];
+ for (var i = 0, len = field.layoutLength(); i < len; i += 1|0) {
+ layout = field.layout(i);
+ org.apache.arrow.flatbuf.VectorLayout.startVectorLayout(builder);
+ org.apache.arrow.flatbuf.VectorLayout.addBitWidth(builder, layout.bitWidth());
+ org.apache.arrow.flatbuf.VectorLayout.addType(builder, layout.type());
+ layoutOffsets.push(org.apache.arrow.flatbuf.VectorLayout.endVectorLayout(builder));
+ }
+ var layoutOffset = org.apache.arrow.flatbuf.Field.createLayoutVector(builder, layoutOffsets);
+
+ org.apache.arrow.flatbuf.Field.startField(builder);
+ org.apache.arrow.flatbuf.Field.addName(builder, nameOffset);
+ org.apache.arrow.flatbuf.Field.addNullable(builder, field.nullable());
+ org.apache.arrow.flatbuf.Field.addTypeType(builder, typeType);
+ org.apache.arrow.flatbuf.Field.addType(builder, typeOffset);
+ org.apache.arrow.flatbuf.Field.addChildren(builder, childrenOffset);
+ org.apache.arrow.flatbuf.Field.addLayout(builder, layoutOffset);
+ var offset = org.apache.arrow.flatbuf.Field.endField(builder);
+ builder.finish(offset);
+
+ return org.apache.arrow.flatbuf.Field.getRootAsField(builder.bb);
+}
+
+function Int32FromByteBuffer(bb, offset) {
+ return ((bb.bytes_[offset + 3] & 255) << 24) |
+ ((bb.bytes_[offset + 2] & 255) << 16) |
+ ((bb.bytes_[offset + 1] & 255) << 8) |
+ ((bb.bytes_[offset] & 255));
+}
+
+var MAGIC_STR = "ARROW1";
+var MAGIC = new Uint8Array(MAGIC_STR.length);
+for (var i = 0; i < MAGIC_STR.length; i += 1|0) {
+ MAGIC[i] = MAGIC_STR.charCodeAt(i);
+}
+
+function _checkMagic(buf, index) {
+ for (var i = 0; i < MAGIC.length; i += 1|0) {
+ if (MAGIC[i] != buf[index + i]) {
+ return false;
+ }
+ }
+ return true;
+}
+
+var TYPEMAP = {}
+TYPEMAP[Type.NONE] = "NONE";
+TYPEMAP[Type.Null] = "Null";
+TYPEMAP[Type.Int] = "Int";
+TYPEMAP[Type.FloatingPoint] = "FloatingPoint";
+TYPEMAP[Type.Binary] = "Binary";
+TYPEMAP[Type.Utf8] = "Utf8";
+TYPEMAP[Type.Bool] = "Bool";
+TYPEMAP[Type.Decimal] = "Decimal";
+TYPEMAP[Type.Date] = "Date";
+TYPEMAP[Type.Time] = "Time";
+TYPEMAP[Type.Timestamp] = "Timestamp";
+TYPEMAP[Type.Interval] = "Interval";
+TYPEMAP[Type.List] = "List";
+TYPEMAP[Type.FixedSizeList] = "FixedSizeList";
+TYPEMAP[Type.Struct_] = "Struct";
+TYPEMAP[Type.Union] = "Union";
+
+var VECTORTYPEMAP = {};
+VECTORTYPEMAP[VectorType.OFFSET] = 'OFFSET';
+VECTORTYPEMAP[VectorType.DATA] = 'DATA';
+VECTORTYPEMAP[VectorType.VALIDITY] = 'VALIDITY';
+VECTORTYPEMAP[VectorType.TYPE] = 'TYPE';
+
+function parseField(field) {
+ var children = [];
+ for (var i = 0; i < field.childrenLength(); i += 1|0) {
+ children.push(parseField(field.children(i)));
+ }
+
+ var layouts = [];
+ for (var i = 0; i < field.layoutLength(); i += 1|0) {
+ layouts.push(VECTORTYPEMAP[field.layout(i).type()]);
+ }
+
+ return {
+ name: field.name(),
+ nullable: field.nullable(),
+ type: TYPEMAP[field.typeType()],
+ children: children,
+ layout: layouts
+ };
+}
+
+function parseSchema(schema) {
+ var result = [];
+ var this_result, type;
+ for (var i = 0, len = schema.fieldsLength(); i < len; i += 1|0) {
+ result.push(parseField(schema.fields(i)));
+ }
+ return result;
+}
+
+function loadVectors(bb, vectors: Vector[], recordBatch) {
+ var indices = { bufferIndex: 0, nodeIndex: 0 }, i;
+ for (i = 0; i < vectors.length; i += 1) {
+ loadVector(bb, vectors[i], recordBatch, indices);
+ }
+}
+
+/**
+ * Loads a vector with data from a batch
+ * recordBatch: { nodes: org.apache.arrow.flatbuf.FieldNode[], buffers: { offset: number, length: number }[] }
+ */
+function loadVector(bb, vector: Vector, recordBatch, indices) {
+ var node = recordBatch.nodes[indices.nodeIndex], ownBuffersLength, ownBuffers = [], i;
+ indices.nodeIndex += 1;
+
+ // dictionary vectors are always ints, so will have a data vector plus optional null vector
+ if (vector.field.dictionary() == null) {
+ ownBuffersLength = vector.field.layoutLength();
+ } else if (vector.field.nullable()) {
+ ownBuffersLength = 2;
+ } else {
+ ownBuffersLength = 1;
+ }
+
+ for (i = 0; i < ownBuffersLength; i += 1) {
+ ownBuffers.push(recordBatch.buffers[indices.bufferIndex + i]);
+ }
+ indices.bufferIndex += ownBuffersLength;
+
+ vector.loadData(bb, node, ownBuffers);
+
+ var children = vector.getChildVectors();
+ for (i = 0; i < children.length; i++) {
+ loadVector(bb, children[i], recordBatch, indices);
+ }
+}
http://git-wip-us.apache.org/repos/asf/arrow/blob/2d6453b2/js/src/bitarray.ts
----------------------------------------------------------------------
diff --git a/js/src/bitarray.ts b/js/src/bitarray.ts
new file mode 100644
index 0000000..fc3c091
--- /dev/null
+++ b/js/src/bitarray.ts
@@ -0,0 +1,42 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+export class BitArray {
+ private view: Uint8Array;
+
+ constructor(buffer: ArrayBuffer, offset: number, length: number) {
+ this.view = new Uint8Array(buffer, offset || 0, Math.ceil(length / 8));
+ }
+
+ get(i) {
+ var index = (i >> 3) | 0; // | 0 converts to an int. Math.floor works too.
+ var bit = i % 8; // i % 8 is just as fast as i & 7
+ return (this.view[index] & (1 << bit)) !== 0;
+ }
+
+ set(i) {
+ var index = (i >> 3) | 0;
+ var bit = i % 8;
+ this.view[index] |= 1 << bit;
+ }
+
+ unset(i) {
+ var index = (i >> 3) | 0;
+ var bit = i % 8;
+ this.view[index] &= ~(1 << bit);
+ }
+}
http://git-wip-us.apache.org/repos/asf/arrow/blob/2d6453b2/js/src/types.ts
----------------------------------------------------------------------
diff --git a/js/src/types.ts b/js/src/types.ts
new file mode 100644
index 0000000..d656c6a
--- /dev/null
+++ b/js/src/types.ts
@@ -0,0 +1,589 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { BitArray } from './bitarray';
+import { TextDecoder } from 'text-encoding';
+import { org } from './Arrow_generated';
+
+var Type = org.apache.arrow.flatbuf.Type;
+
+interface ArrayView {
+ slice(start: number, end: number) : ArrayView
+ toString() : string
+}
+
+export abstract class Vector {
+ field: any;
+ name: string;
+ length: number;
+ null_count: number;
+
+ constructor(field) {
+ this.field = field;
+ this.name = field.name();
+ }
+
+ /* Access datum at index i */
+ abstract get(i);
+ /* Return array representing data in the range [start, end) */
+ abstract slice(start: number, end: number);
+ /* Return array of child vectors, for container types */
+ abstract getChildVectors();
+
+ /**
+ * Use recordBatch fieldNodes and Buffers to construct this Vector
+ * bb: flatbuffers.ByteBuffer
+ * node: org.apache.arrow.flatbuf.FieldNode
+ * buffers: { offset: number, length: number }[]
+ */
+ public loadData(bb, node, buffers) {
+ this.length = node.length().low;
+ this.null_count = node.nullCount().low;
+ this.loadBuffers(bb, node, buffers);
+ }
+
+ protected abstract loadBuffers(bb, node, buffers);
+
+ /**
+ * Helper function for loading a VALIDITY buffer (for Nullable types)
+ * bb: flatbuffers.ByteBuffer
+ * buffer: org.apache.arrow.flatbuf.Buffer
+ */
+ static loadValidityBuffer(bb, buffer) : BitArray {
+ var arrayBuffer = bb.bytes_.buffer;
+ var offset = bb.bytes_.byteOffset + buffer.offset;
+ return new BitArray(arrayBuffer, offset, buffer.length * 8);
+ }
+
+ /**
+ * Helper function for loading an OFFSET buffer
+ * buffer: org.apache.arrow.flatbuf.Buffer
+ */
+ static loadOffsetBuffer(bb, buffer) : Int32Array {
+ var arrayBuffer = bb.bytes_.buffer;
+ var offset = bb.bytes_.byteOffset + buffer.offset;
+ var length = buffer.length / Int32Array.BYTES_PER_ELEMENT;
+ return new Int32Array(arrayBuffer, offset, length);
+ }
+
+}
+
+class SimpleVector<T extends ArrayView> extends Vector {
+ protected dataView: T;
+ private TypedArray: { new(buffer: any, offset: number, length: number): T, BYTES_PER_ELEMENT: number };
+
+ constructor (field, TypedArray: { new(buffer: any, offset: number, length: number): T, BYTES_PER_ELEMENT: number }) {
+ super(field);
+ this.TypedArray = TypedArray;
+ }
+
+ getChildVectors() {
+ return [];
+ }
+
+ get(i) {
+ return this.dataView[i];
+ }
+
+ loadBuffers(bb, node, buffers) {
+ this.loadDataBuffer(bb, buffers[0]);
+ }
+
+ /**
+ * buffer: org.apache.arrow.flatbuf.Buffer
+ */
+ protected loadDataBuffer(bb, buffer) {
+ var arrayBuffer = bb.bytes_.buffer;
+ var offset = bb.bytes_.byteOffset + buffer.offset;
+ var length = buffer.length / this.TypedArray.BYTES_PER_ELEMENT;
+ this.dataView = new this.TypedArray(arrayBuffer, offset, length);
+ }
+
+ getDataView() {
+ return this.dataView;
+ }
+
+ toString() {
+ return this.dataView.toString();
+ }
+
+ slice(start, end) {
+ return this.dataView.slice(start, end);
+ }
+}
+
+class NullableSimpleVector<T extends ArrayView> extends SimpleVector<T> {
+
+ protected validityView: BitArray;
+
+ get(i: number) {
+ if (this.validityView.get(i)) {
+ return this.dataView[i];
+ } else {
+ return null;
+ }
+ }
+
+ loadBuffers(bb, node, buffers) {
+ this.validityView = Vector.loadValidityBuffer(bb, buffers[0]);
+ this.loadDataBuffer(bb, buffers[1]);
+ }
+
+ getValidityVector() {
+ return this.validityView;
+ }
+}
+
+class Uint8Vector extends SimpleVector<Uint8Array> { constructor(field) { super(field, Uint8Array); }; }
+class Uint16Vector extends SimpleVector<Uint16Array> { constructor(field) { super(field, Uint16Array); }; }
+class Uint32Vector extends SimpleVector<Uint32Array> { constructor(field) { super(field, Uint32Array); }; }
+class Int8Vector extends SimpleVector<Uint8Array> { constructor(field) { super(field, Uint8Array); }; }
+class Int16Vector extends SimpleVector<Uint16Array> { constructor(field) { super(field, Uint16Array); }; }
+class Int32Vector extends SimpleVector<Uint32Array> { constructor(field) { super(field, Uint32Array); }; }
+class Float32Vector extends SimpleVector<Float32Array> { constructor(field) { super(field, Float32Array); }; }
+class Float64Vector extends SimpleVector<Float64Array> { constructor(field) { super(field, Float64Array); }; }
+
+class NullableUint8Vector extends NullableSimpleVector<Uint8Array> { constructor(field) { super(field, Uint8Array); }; }
+class NullableUint16Vector extends NullableSimpleVector<Uint16Array> { constructor(field) { super(field, Uint16Array); }; }
+class NullableUint32Vector extends NullableSimpleVector<Uint32Array> { constructor(field) { super(field, Uint32Array); }; }
+class NullableInt8Vector extends NullableSimpleVector<Uint8Array> { constructor(field) { super(field, Uint8Array); }; }
+class NullableInt16Vector extends NullableSimpleVector<Uint16Array> { constructor(field) { super(field, Uint16Array); }; }
+class NullableInt32Vector extends NullableSimpleVector<Uint32Array> { constructor(field) { super(field, Uint32Array); }; }
+class NullableFloat32Vector extends NullableSimpleVector<Float32Array> { constructor(field) { super(field, Float32Array); }; }
+class NullableFloat64Vector extends NullableSimpleVector<Float64Array> { constructor(field) { super(field, Float64Array); }; }
+
+class Uint64Vector extends SimpleVector<Uint32Array> {
+ constructor(field) {
+ super(field, Uint32Array);
+ }
+
+ get(i: number) {
+ return { low: this.dataView[i * 2], high: this.dataView[(i * 2) + 1] };
+ }
+}
+
+class NullableUint64Vector extends NullableSimpleVector<Uint32Array> {
+ constructor(field) {
+ super(field, Uint32Array);
+ }
+
+ get(i: number) {
+ if (this.validityView.get(i)) {
+ return { low: this.dataView[i * 2], high: this.dataView[(i * 2) + 1] };
+ } else {
+ return null;
+ }
+ }
+}
+
+class Int64Vector extends NullableSimpleVector<Uint32Array> {
+ constructor(field) {
+ super(field, Uint32Array);
+ }
+
+ get(i: number) {
+ return { low: this.dataView[i * 2], high: this.dataView[(i * 2) + 1] };
+ }
+}
+
+class NullableInt64Vector extends NullableSimpleVector<Uint32Array> {
+ constructor(field) {
+ super(field, Uint32Array);
+ }
+
+ get(i: number) {
+ if (this.validityView.get(i)) {
+ return { low: this.dataView[i * 2], high: this.dataView[(i * 2) + 1] };
+ } else {
+ return null;
+ }
+ }
+}
+
+class DateVector extends SimpleVector<Uint32Array> {
+ constructor(field) {
+ super(field, Uint32Array);
+ }
+
+ get (i) {
+ return new Date(super.get(2*i+1)*Math.pow(2,32) + super.get(2*i));
+ }
+}
+
+class NullableDateVector extends DateVector {
+ private validityView: BitArray;
+
+ loadBuffers(bb, node, buffers) {
+ this.validityView = Vector.loadValidityBuffer(bb, buffers[0]);
+ this.loadDataBuffer(bb, buffers[1]);
+ }
+
+ get (i) {
+ if (this.validityView.get(i)) {
+ return super.get(i);
+ } else {
+ return null;
+ }
+ }
+
+ getValidityVector() {
+ return this.validityView;
+ }
+}
+
+class Utf8Vector extends SimpleVector<Uint8Array> {
+ protected offsetView: Int32Array;
+ static decoder: TextDecoder = new TextDecoder('utf8');
+
+ constructor(field) {
+ super(field, Uint8Array);
+ }
+
+ loadBuffers(bb, node, buffers) {
+ this.offsetView = Vector.loadOffsetBuffer(bb, buffers[0]);
+ this.loadDataBuffer(bb, buffers[1]);
+ }
+
+ get(i) {
+ return Utf8Vector.decoder.decode(this.dataView.slice(this.offsetView[i], this.offsetView[i + 1]));
+ }
+
+ slice(start: number, end: number) {
+ var result: string[] = [];
+ for (var i: number = start; i < end; i += 1|0) {
+ result.push(this.get(i));
+ }
+ return result;
+ }
+
+ getOffsetView() {
+ return this.offsetView;
+ }
+}
+
+class NullableUtf8Vector extends Utf8Vector {
+ private validityView: BitArray;
+
+ loadBuffers(bb, node, buffers) {
+ this.validityView = Vector.loadValidityBuffer(bb, buffers[0]);
+ this.offsetView = Vector.loadOffsetBuffer(bb, buffers[1]);
+ this.loadDataBuffer(bb, buffers[2]);
+ }
+
+ get(i) {
+ if (this.validityView.get(i)) {
+ return super.get(i);
+ } else {
+ return null;
+ }
+ }
+
+ getValidityVector() {
+ return this.validityView;
+ }
+}
+
+// Nested Types
+class ListVector extends Uint32Vector {
+ private dataVector: Vector;
+
+ constructor(field, dataVector: Vector) {
+ super(field);
+ this.dataVector = dataVector;
+ }
+
+ getChildVectors() {
+ return [this.dataVector];
+ }
+
+ loadBuffers(bb, node, buffers) {
+ super.loadBuffers(bb, node, buffers);
+ this.length -= 1;
+ }
+
+ get(i) {
+ var offset = super.get(i)
+ if (offset === null) {
+ return null;
+ }
+ var next_offset = super.get(i + 1)
+ return this.dataVector.slice(offset, next_offset)
+ }
+
+ toString() {
+ return "length: " + (this.length);
+ }
+
+ slice(start: number, end: number) {
+ var result = [];
+ for (var i = start; i < end; i += 1|0) {
+ result.push(this.get(i));
+ }
+ return result;
+ }
+}
+
+class NullableListVector extends ListVector {
+ private validityView: BitArray;
+
+ loadBuffers(bb, node, buffers) {
+ this.validityView = Vector.loadValidityBuffer(bb, buffers[0]);
+ this.loadDataBuffer(bb, buffers[1]);
+ this.length -= 1;
+ }
+
+ get(i) {
+ if (this.validityView.get(i)) {
+ return super.get(i);
+ } else {
+ return null;
+ }
+ }
+
+ getValidityVector() {
+ return this.validityView;
+ }
+}
+
+class FixedSizeListVector extends Vector {
+ private size: number
+ private dataVector: Vector;
+
+ constructor(field, size: number, dataVector: Vector) {
+ super(field);
+ this.size = size;
+ this.dataVector = dataVector;
+ }
+
+ getChildVectors() {
+ return [this.dataVector];
+ }
+
+ loadBuffers(bb, node, buffers) {
+ // no buffers to load
+ }
+
+ get(i: number) {
+ return this.dataVector.slice(i * this.size, (i + 1) * this.size);
+ }
+
+ slice(start : number, end : number) {
+ var result = [];
+ for (var i = start; i < end; i += 1|0) {
+ result.push(this.get(i));
+ }
+ return result;
+ }
+
+ getListSize() {
+ return this.size;
+ }
+}
+
+class NullableFixedSizeListVector extends FixedSizeListVector {
+ private validityView: BitArray;
+
+ loadBuffers(bb, node, buffers) {
+ this.validityView = Vector.loadValidityBuffer(bb, buffers[0]);
+ }
+
+ get(i: number) {
+ if (this.validityView.get(i)) {
+ return super.get(i);
+ } else {
+ return null;
+ }
+ }
+
+ getValidityVector() {
+ return this.validityView;
+ }
+}
+
+class StructVector extends Vector {
+ private validityView: BitArray;
+ private vectors: Vector[];
+
+ constructor(field, vectors: Vector[]) {
+ super(field);
+ this.vectors = vectors;
+ }
+
+ getChildVectors() {
+ return this.vectors;
+ }
+
+ loadBuffers(bb, node, buffers) {
+ this.validityView = Vector.loadValidityBuffer(bb, buffers[0]);
+ }
+
+ get(i : number) {
+ if (this.validityView.get(i)) {
+ return this.vectors.map((v: Vector) => v.get(i));
+ } else {
+ return null;
+ }
+ }
+
+ slice(start : number, end : number) {
+ var result = [];
+ for (var i = start; i < end; i += 1|0) {
+ result.push(this.get(i));
+ }
+ return result;
+ }
+
+ getValidityVector() {
+ return this.validityView;
+ }
+}
+
+class DictionaryVector extends Vector {
+
+ private indices: Vector;
+ private dictionary: Vector;
+
+ constructor (field, indices: Vector, dictionary: Vector) {
+ super(field);
+ this.indices = indices;
+ this.dictionary = dictionary;
+ }
+
+ get(i) {
+ var encoded = this.indices.get(i);
+ if (encoded == null) {
+ return null;
+ } else {
+ return this.dictionary.get(encoded);
+ }
+ }
+
+ /** Get the dictionary encoded value */
+ public getEncoded(i) {
+ return this.indices.get(i);
+ }
+
+ slice(start, end) {
+ return this.indices.slice(start, end); // TODO decode
+ }
+
+ getChildVectors() {
+ return this.indices.getChildVectors();
+ }
+
+ loadBuffers(bb, node, buffers) {
+ this.indices.loadData(bb, node, buffers);
+ }
+
+ /** Get the index (encoded) vector */
+ public getIndexVector() {
+ return this.indices;
+ }
+
+ /** Get the dictionary vector */
+ public getDictionaryVector() {
+ return this.dictionary;
+ }
+
+ toString() {
+ return this.indices.toString();
+ }
+}
+
+export function vectorFromField(field, dictionaries) : Vector {
+ var dictionary = field.dictionary(), nullable = field.nullable();
+ if (dictionary == null) {
+ var typeType = field.typeType();
+ if (typeType === Type.List) {
+ var dataVector = vectorFromField(field.children(0), dictionaries);
+ return nullable ? new NullableListVector(field, dataVector) : new ListVector(field, dataVector);
+ } else if (typeType === Type.FixedSizeList) {
+ var dataVector = vectorFromField(field.children(0), dictionaries);
+ var size = field.type(new org.apache.arrow.flatbuf.FixedSizeList()).listSize();
+ if (nullable) {
+ return new NullableFixedSizeListVector(field, size, dataVector);
+ } else {
+ return new FixedSizeListVector(field, size, dataVector);
+ }
+ } else if (typeType === Type.Struct_) {
+ var vectors : Vector[] = [];
+ for (var i : number = 0; i < field.childrenLength(); i += 1|0) {
+ vectors.push(vectorFromField(field.children(i), dictionaries));
+ }
+ return new StructVector(field, vectors);
+ } else {
+ if (typeType === Type.Int) {
+ var type = field.type(new org.apache.arrow.flatbuf.Int());
+ return _createIntVector(field, type.bitWidth(), type.isSigned(), nullable)
+ } else if (typeType === Type.FloatingPoint) {
+ var precision = field.type(new org.apache.arrow.flatbuf.FloatingPoint()).precision();
+ if (precision == org.apache.arrow.flatbuf.Precision.SINGLE) {
+ return nullable ? new NullableFloat32Vector(field) : new Float32Vector(field);
+ } else if (precision == org.apache.arrow.flatbuf.Precision.DOUBLE) {
+ return nullable ? new NullableFloat64Vector(field) : new Float64Vector(field);
+ } else {
+ throw "Unimplemented FloatingPoint precision " + precision;
+ }
+ } else if (typeType === Type.Utf8) {
+ return nullable ? new NullableUtf8Vector(field) : new Utf8Vector(field);
+ } else if (typeType === Type.Date) {
+ return nullable ? new NullableDateVector(field) : new DateVector(field);
+ } else {
+ throw "Unimplemented type " + typeType;
+ }
+ }
+ } else {
+ // determine arrow type - default is signed 32 bit int
+ var type = dictionary.indexType(), bitWidth = 32, signed = true;
+ if (type != null) {
+ bitWidth = type.bitWidth();
+ signed = type.isSigned();
+ }
+ var indices = _createIntVector(field, bitWidth, signed, nullable);
+ return new DictionaryVector(field, indices, dictionaries[dictionary.id().toFloat64().toString()]);
+ }
+}
+
+function _createIntVector(field, bitWidth, signed, nullable) {
+ if (bitWidth == 64) {
+ if (signed) {
+ return nullable ? new NullableInt64Vector(field) : new Int64Vector(field);
+ } else {
+ return nullable ? new NullableUint64Vector(field) : new Uint64Vector(field);
+ }
+ } else if (bitWidth == 32) {
+ if (signed) {
+ return nullable ? new NullableInt32Vector(field) : new Int32Vector(field);
+ } else {
+ return nullable ? new NullableUint32Vector(field) : new Uint32Vector(field);
+ }
+ } else if (bitWidth == 16) {
+ if (signed) {
+ return nullable ? new NullableInt16Vector(field) : new Int16Vector(field);
+ } else {
+ return nullable ? new NullableUint16Vector(field) : new Uint16Vector(field);
+ }
+ } else if (bitWidth == 8) {
+ if (signed) {
+ return nullable ? new NullableInt8Vector(field) : new Int8Vector(field);
+ } else {
+ return nullable ? new NullableUint8Vector(field) : new Uint8Vector(field);
+ }
+ } else {
+ throw "Unimplemented Int bit width " + bitWidth;
+ }
+}
http://git-wip-us.apache.org/repos/asf/arrow/blob/2d6453b2/js/tsconfig.json
----------------------------------------------------------------------
diff --git a/js/tsconfig.json b/js/tsconfig.json
index 89c31ef..f2ad0e8 100644
--- a/js/tsconfig.json
+++ b/js/tsconfig.json
@@ -1,14 +1,14 @@
{
"compilerOptions": {
- "outDir": "./dist/",
- "allowJs": true,
- "target": "es5",
"module": "commonjs",
- "moduleResolution": "node"
+ "target": "es5",
+ "lib": ["es2015", "dom"],
+ "outDir": "lib",
+ "moduleResolution": "node",
+ "sourceMap": true,
+ "declaration": true
},
"include": [
- "typings/index.d.ts",
- "lib/*.js",
- "lib/*.ts"
+ "src/*.ts"
]
}