You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by do...@apache.org on 2023/09/15 08:09:51 UTC
[arrow] branch main updated: GH-34567: [JS] Improve build and do not generate `bin/bin` directory (#36607)
This is an automated email from the ASF dual-hosted git repository.
domoritz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new a446ff71b8 GH-34567: [JS] Improve build and do not generate `bin/bin` directory (#36607)
a446ff71b8 is described below
commit a446ff71b87880d399a204b767ee493cff573d15
Author: Abe Tomoaki <ab...@enzou.tokyo>
AuthorDate: Fri Sep 15 17:09:45 2023 +0900
GH-34567: [JS] Improve build and do not generate `bin/bin` directory (#36607)
### Rationale for this change
`bin/bin` directory is unnecessary and should not be generated.
### What changes are included in this PR?
* Add setting to exclude in tsconfig
* Correctly set up `bin` out directory
### Are these changes tested?
The following files are not generated.
```
targets/apache-arrow/bin/bin/arrow2csv.js
targets/apache-arrow/bin/bin/arrow2csv.js.map
targets/apache-arrow/bin/bin/arrow2csv.mjs
targets/apache-arrow/bin/src/bin/arrow2csv.ts
targets/es2015/cjs/bin/bin/arrow2csv.js
targets/es2015/cjs/bin/bin/arrow2csv.js.map
targets/es2015/cjs/bin/src/bin/arrow2csv.ts
targets/es2015/esm/bin/bin/arrow2csv.js
targets/es2015/esm/bin/bin/arrow2csv.js.map
targets/es2015/esm/bin/src/bin/arrow2csv.ts
targets/es2015/umd/bin/bin/arrow2csv.js
targets/es2015/umd/bin/bin/arrow2csv.js.map
targets/es2015/umd/bin/src/bin/arrow2csv.ts
targets/es5/cjs/bin/bin/arrow2csv.js
targets/es5/cjs/bin/bin/arrow2csv.js.map
targets/es5/cjs/bin/src/bin/arrow2csv.ts
targets/es5/esm/bin/bin/arrow2csv.js
targets/es5/esm/bin/bin/arrow2csv.js.map
targets/es5/esm/bin/src/bin/arrow2csv.ts
targets/es5/umd/bin/bin/arrow2csv.js
targets/es5/umd/bin/bin/arrow2csv.js.map
targets/es5/umd/bin/src/bin/arrow2csv.ts
targets/esnext/cjs/bin/bin/arrow2csv.js
targets/esnext/cjs/bin/bin/arrow2csv.js.map
targets/esnext/cjs/bin/src/bin/arrow2csv.ts
targets/esnext/esm/bin/bin/arrow2csv.js
targets/esnext/esm/bin/bin/arrow2csv.js.map
targets/esnext/esm/bin/src/bin/arrow2csv.ts
targets/esnext/umd/bin/bin/arrow2csv.js
targets/esnext/umd/bin/bin/arrow2csv.js.map
targets/esnext/umd/bin/src/bin/arrow2csv.ts
```
### Are there any user-facing changes?
* Closes: #34567
Lead-authored-by: abetomo <ab...@enzou.tokyo>
Co-authored-by: ptaylor <pa...@me.com>
Signed-off-by: Dominik Moritz <do...@gmail.com>
---
js/.eslintrc.cjs | 2 +-
js/gulp/arrow-task.js | 28 +++++++++++++++++++---------
js/gulp/typescript-task.js | 26 ++++++++++++++++++++------
js/gulpfile.js | 4 ++++
js/src/Arrow.ts | 2 ++
js/src/bin/arrow2csv.ts | 16 ++++++++--------
js/tsconfig/tsconfig.base.json | 2 +-
7 files changed, 55 insertions(+), 25 deletions(-)
diff --git a/js/.eslintrc.cjs b/js/.eslintrc.cjs
index b629b86219..8a36516eec 100644
--- a/js/.eslintrc.cjs
+++ b/js/.eslintrc.cjs
@@ -23,7 +23,7 @@ module.exports = {
},
parser: "@typescript-eslint/parser",
parserOptions: {
- project: "tsconfig.json",
+ project: ["tsconfig.json", "tsconfig/tsconfig.bin.cjs.json"],
sourceType: "module",
ecmaVersion: 2020,
},
diff --git a/js/gulp/arrow-task.js b/js/gulp/arrow-task.js
index 411a817ddc..2de20947dc 100644
--- a/js/gulp/arrow-task.js
+++ b/js/gulp/arrow-task.js
@@ -15,19 +15,18 @@
// specific language governing permissions and limitations
// under the License.
-import { targetDir, observableFromStreams } from './util.js';
+import { mainExport, targetDir, observableFromStreams } from './util.js';
-import { deleteAsync as del } from 'del';
import gulp from 'gulp';
+import path from 'path';
import { mkdirp } from 'mkdirp';
+import * as fs from 'fs/promises';
import gulpRename from 'gulp-rename';
import gulpReplace from 'gulp-replace';
import { memoizeTask } from './memoize-task.js';
import { ReplaySubject, forkJoin as ObservableForkJoin } from 'rxjs';
import { share } from 'rxjs/operators';
-import util from 'util';
-import stream from 'stream';
-const pipeline = util.promisify(stream.pipeline);
+import { pipeline } from 'stream/promises';
export const arrowTask = ((cache) => memoizeTask(cache, function copyMain(target) {
const out = targetDir(target);
@@ -54,9 +53,20 @@ export const arrowTask = ((cache) => memoizeTask(cache, function copyMain(target
}))({});
export const arrowTSTask = ((cache) => memoizeTask(cache, async function copyTS(target, format) {
+ const umd = targetDir(`es5`, `umd`);
const out = targetDir(target, format);
- await mkdirp(out);
- await pipeline(gulp.src(`src/**/*`), gulp.dest(out));
- await del(`${out}/**/*.js`);
-}))({});
+ const arrowUMD = path.join(umd, `${mainExport}.js`);
+ const arrow2csvUMD = path.join(umd, `bin`, `arrow2csv.js`);
+
+ await mkdirp(path.join(out, 'bin'));
+ await Promise.all([
+ pipeline(gulp.src(`src/**/*`), gulp.dest(out)),
+ pipeline(
+ gulp.src([arrowUMD, arrow2csvUMD]),
+ gulpReplace(`../${mainExport}.js`, `./${mainExport}.js`),
+ gulp.dest(path.join(out, 'bin'))
+ ),
+ fs.writeFile(path.join(out, 'bin', 'package.json'), '{"type": "commonjs"}')
+ ]);
+}))({});
diff --git a/js/gulp/typescript-task.js b/js/gulp/typescript-task.js
index 0219219232..31769e3b1b 100644
--- a/js/gulp/typescript-task.js
+++ b/js/gulp/typescript-task.js
@@ -19,12 +19,13 @@ import { targetDir, tsconfigName, observableFromStreams, shouldRunInChildProcess
import gulp from 'gulp';
import path from 'path';
-import ts from 'gulp-typescript';
import tsc from 'typescript';
+import ts from 'gulp-typescript';
+import * as fs from 'fs/promises';
import sourcemaps from 'gulp-sourcemaps';
import { memoizeTask } from './memoize-task.js';
-import { ReplaySubject, forkJoin as ObservableForkJoin } from 'rxjs';
-import { mergeWith, takeLast, share } from 'rxjs/operators';
+import { ReplaySubject, forkJoin as ObservableForkJoin, defer as ObservableDefer } from 'rxjs';
+import { mergeWith, takeLast, share, concat } from 'rxjs/operators';
export const typescriptTask = ((cache) => memoizeTask(cache, function typescript(target, format) {
if (shouldRunInChildProcess(target, format)) {
@@ -44,10 +45,15 @@ export default typescriptTask;
export function compileBinFiles(target, format) {
const out = targetDir(target, format);
const tsconfigPath = path.join(`tsconfig`, `tsconfig.${tsconfigName('bin', 'cjs')}.json`);
- return compileTypescript(path.join(out, 'bin'), tsconfigPath, { target });
+ const tsconfigOverrides = format === 'esm' ? { target, module: 'ES2015' } : { target };
+ return compileTypescript(out, tsconfigPath, tsconfigOverrides, false)
+ .pipe(takeLast(1))
+ .pipe(concat(ObservableDefer(() => {
+ return fs.chmod(path.join(out, 'bin', 'arrow2csv.js'), 0o755);
+ })));
}
-function compileTypescript(out, tsconfigPath, tsconfigOverrides) {
+function compileTypescript(out, tsconfigPath, tsconfigOverrides, writeSourcemaps = true) {
const tsProject = ts.createProject(tsconfigPath, { typescript: tsc, ...tsconfigOverrides });
const { stream: { js, dts } } = observableFromStreams(
tsProject.src(), sourcemaps.init(),
@@ -56,7 +62,15 @@ function compileTypescript(out, tsconfigPath, tsconfigOverrides) {
const writeSources = observableFromStreams(tsProject.src(), gulp.dest(path.join(out, 'src')));
const writeDTypes = observableFromStreams(dts, sourcemaps.write('./', { includeContent: false, sourceRoot: './src' }), gulp.dest(out));
const mapFile = tsProject.options.module === tsc.ModuleKind.ES2015 ? esmMapFile : cjsMapFile;
- const writeJS = observableFromStreams(js, sourcemaps.write('./', { mapFile, includeContent: false, sourceRoot: './src' }), gulp.dest(out));
+ const writeJSArgs = writeSourcemaps ? [
+ js,
+ sourcemaps.write('./', { mapFile, includeContent: false, sourceRoot: './src' }),
+ gulp.dest(out)
+ ] : [
+ js,
+ gulp.dest(out)
+ ];
+ const writeJS = observableFromStreams(...writeJSArgs);
return ObservableForkJoin([writeSources, writeDTypes, writeJS]);
}
diff --git a/js/gulpfile.js b/js/gulpfile.js
index 6544b987b7..bf84a4a9e1 100644
--- a/js/gulpfile.js
+++ b/js/gulpfile.js
@@ -54,6 +54,10 @@ knownTargets.forEach((target) => {
));
});
+gulp.task(`build:ts`, gulp.series(
+ `build:es5:umd`, `clean:ts`, `compile:ts`, `package:ts`
+));
+
// The main "apache-arrow" module builds the es2015/umd, es2015/cjs,
// es2015/esm, and esnext/umd targets, then copies and renames the
// compiled output into the apache-arrow folder
diff --git a/js/src/Arrow.ts b/js/src/Arrow.ts
index dc44e10b92..4a6394c266 100644
--- a/js/src/Arrow.ts
+++ b/js/src/Arrow.ts
@@ -99,6 +99,7 @@ import * as util_bit_ from './util/bit.js';
import * as util_math_ from './util/math.js';
import * as util_buffer_ from './util/buffer.js';
import * as util_vector_ from './util/vector.js';
+import * as util_pretty_ from './util/pretty.js';
import { compareSchemas, compareFields, compareTypes } from './visitor/typecomparator.js';
/** @ignore */
@@ -109,6 +110,7 @@ export const util = {
...util_math_,
...util_buffer_,
...util_vector_,
+ ...util_pretty_,
compareSchemas,
compareFields,
compareTypes,
diff --git a/js/src/bin/arrow2csv.ts b/js/src/bin/arrow2csv.ts
old mode 100644
new mode 100755
index eae7f5805c..39db8c1749
--- a/js/src/bin/arrow2csv.ts
+++ b/js/src/bin/arrow2csv.ts
@@ -21,8 +21,7 @@
import * as fs from 'fs';
import * as stream from 'stream';
-import { valueToString } from '../util/pretty.js';
-import { Schema, RecordBatch, RecordBatchReader, AsyncByteQueue } from '../Arrow.node.js';
+import { Schema, RecordBatch, RecordBatchReader, AsyncByteQueue, util } from '../Arrow.js';
import commandLineUsage from 'command-line-usage';
import commandLineArgs from 'command-line-args';
@@ -58,9 +57,10 @@ type ToStringState = {
if (state.closed) { break; }
for await (reader of recordBatchReaders(source)) {
hasReaders = true;
- const transformToString = batchesToString(state, reader.schema);
+ const batches = stream.Readable.from(reader);
+ const toString = batchesToString(state, reader.schema);
await pipeTo(
- reader.pipe(transformToString),
+ batches.pipe(toString),
process.stdout, { end: false }
).catch(() => state.closed = true); // Handle EPIPE errors
}
@@ -129,7 +129,7 @@ function batchesToString(state: ToStringState, schema: Schema) {
let maxColWidths = [10];
const { hr, sep, metadata } = state;
- const header = ['row_id', ...schema.fields.map((f) => `${f}`)].map(val => valueToString(val));
+ const header = ['row_id', ...schema.fields.map((f) => `${f}`)].map(val => util.valueToString(val));
state.maxColWidths = header.map((x, i) => Math.max(maxColWidths[i] || 0, x.length));
@@ -181,7 +181,7 @@ function batchesToString(state: ToStringState, schema: Schema) {
if (rowId % 350 === 0) {
this.push(`${formatRow(header, maxColWidths, sep)}\n`);
}
- this.push(`${formatRow([rowId++, ...row.toArray()].map(v => valueToString(v)), maxColWidths, sep)}\n`);
+ this.push(`${formatRow([rowId++, ...row.toArray()].map(v => util.valueToString(v)), maxColWidths, sep)}\n`);
}
}
cb();
@@ -202,7 +202,7 @@ function formatMetadataValue(value = '') {
try {
parsed = JSON.stringify(JSON.parse(value), null, 2);
} catch { parsed = value; }
- return valueToString(parsed).split('\n').join('\n ');
+ return util.valueToString(parsed).split('\n').join('\n ');
}
function formatMetadata(metadata: Map<string, string>) {
@@ -236,7 +236,7 @@ function measureColumnWidths(rowId: number, batch: RecordBatch, maxColWidths: nu
(val.length * elementWidth) // width of stringified 2^N-1
);
} else {
- maxColWidths[j + 1] = Math.max(maxColWidths[j + 1] || 0, valueToString(val).length);
+ maxColWidths[j + 1] = Math.max(maxColWidths[j + 1] || 0, util.valueToString(val).length);
}
++j;
}
diff --git a/js/tsconfig/tsconfig.base.json b/js/tsconfig/tsconfig.base.json
index fb4ecb38b5..0d7fefd909 100644
--- a/js/tsconfig/tsconfig.base.json
+++ b/js/tsconfig/tsconfig.base.json
@@ -1,5 +1,5 @@
{
- "exclude": ["../node_modules"],
+ "exclude": ["../node_modules", "../src/bin/*.ts"],
"include": ["../src/**/*.ts"],
"compileOnSave": false,
"compilerOptions": {