You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by do...@apache.org on 2023/09/15 08:09:51 UTC

[arrow] branch main updated: GH-34567: [JS] Improve build and do not generate `bin/bin` directory (#36607)

This is an automated email from the ASF dual-hosted git repository.

domoritz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new a446ff71b8 GH-34567: [JS] Improve build and do not generate `bin/bin` directory (#36607)
a446ff71b8 is described below

commit a446ff71b87880d399a204b767ee493cff573d15
Author: Abe Tomoaki <ab...@enzou.tokyo>
AuthorDate: Fri Sep 15 17:09:45 2023 +0900

    GH-34567: [JS] Improve build and do not generate `bin/bin` directory (#36607)
    
    
    
    ### Rationale for this change
    
    `bin/bin` directory is unnecessary and should not be generated.
    
    ### What changes are included in this PR?
    
    * Add setting to exclude in tsconfig
    * Correctly set up `bin` out directory
    
    ### Are these changes tested?
    
    The following files are not generated.
    
    ```
    targets/apache-arrow/bin/bin/arrow2csv.js
    targets/apache-arrow/bin/bin/arrow2csv.js.map
    targets/apache-arrow/bin/bin/arrow2csv.mjs
    targets/apache-arrow/bin/src/bin/arrow2csv.ts
    targets/es2015/cjs/bin/bin/arrow2csv.js
    targets/es2015/cjs/bin/bin/arrow2csv.js.map
    targets/es2015/cjs/bin/src/bin/arrow2csv.ts
    targets/es2015/esm/bin/bin/arrow2csv.js
    targets/es2015/esm/bin/bin/arrow2csv.js.map
    targets/es2015/esm/bin/src/bin/arrow2csv.ts
    targets/es2015/umd/bin/bin/arrow2csv.js
    targets/es2015/umd/bin/bin/arrow2csv.js.map
    targets/es2015/umd/bin/src/bin/arrow2csv.ts
    targets/es5/cjs/bin/bin/arrow2csv.js
    targets/es5/cjs/bin/bin/arrow2csv.js.map
    targets/es5/cjs/bin/src/bin/arrow2csv.ts
    targets/es5/esm/bin/bin/arrow2csv.js
    targets/es5/esm/bin/bin/arrow2csv.js.map
    targets/es5/esm/bin/src/bin/arrow2csv.ts
    targets/es5/umd/bin/bin/arrow2csv.js
    targets/es5/umd/bin/bin/arrow2csv.js.map
    targets/es5/umd/bin/src/bin/arrow2csv.ts
    targets/esnext/cjs/bin/bin/arrow2csv.js
    targets/esnext/cjs/bin/bin/arrow2csv.js.map
    targets/esnext/cjs/bin/src/bin/arrow2csv.ts
    targets/esnext/esm/bin/bin/arrow2csv.js
    targets/esnext/esm/bin/bin/arrow2csv.js.map
    targets/esnext/esm/bin/src/bin/arrow2csv.ts
    targets/esnext/umd/bin/bin/arrow2csv.js
    targets/esnext/umd/bin/bin/arrow2csv.js.map
    targets/esnext/umd/bin/src/bin/arrow2csv.ts
    ```
    
    ### Are there any user-facing changes?
    
    * Closes: #34567
    
    Lead-authored-by: abetomo <ab...@enzou.tokyo>
    Co-authored-by: ptaylor <pa...@me.com>
    Signed-off-by: Dominik Moritz <do...@gmail.com>
---
 js/.eslintrc.cjs               |  2 +-
 js/gulp/arrow-task.js          | 28 +++++++++++++++++++---------
 js/gulp/typescript-task.js     | 26 ++++++++++++++++++++------
 js/gulpfile.js                 |  4 ++++
 js/src/Arrow.ts                |  2 ++
 js/src/bin/arrow2csv.ts        | 16 ++++++++--------
 js/tsconfig/tsconfig.base.json |  2 +-
 7 files changed, 55 insertions(+), 25 deletions(-)

diff --git a/js/.eslintrc.cjs b/js/.eslintrc.cjs
index b629b86219..8a36516eec 100644
--- a/js/.eslintrc.cjs
+++ b/js/.eslintrc.cjs
@@ -23,7 +23,7 @@ module.exports = {
     },
     parser: "@typescript-eslint/parser",
     parserOptions: {
-        project: "tsconfig.json",
+        project: ["tsconfig.json", "tsconfig/tsconfig.bin.cjs.json"],
         sourceType: "module",
         ecmaVersion: 2020,
     },
diff --git a/js/gulp/arrow-task.js b/js/gulp/arrow-task.js
index 411a817ddc..2de20947dc 100644
--- a/js/gulp/arrow-task.js
+++ b/js/gulp/arrow-task.js
@@ -15,19 +15,18 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import { targetDir, observableFromStreams } from './util.js';
+import { mainExport, targetDir, observableFromStreams } from './util.js';
 
-import { deleteAsync as del } from 'del';
 import gulp from 'gulp';
+import path from 'path';
 import { mkdirp } from 'mkdirp';
+import * as fs from 'fs/promises';
 import gulpRename from 'gulp-rename';
 import gulpReplace from 'gulp-replace';
 import { memoizeTask } from './memoize-task.js';
 import { ReplaySubject, forkJoin as ObservableForkJoin } from 'rxjs';
 import { share } from 'rxjs/operators';
-import util from 'util';
-import stream from 'stream';
-const pipeline = util.promisify(stream.pipeline);
+import { pipeline } from 'stream/promises';
 
 export const arrowTask = ((cache) => memoizeTask(cache, function copyMain(target) {
     const out = targetDir(target);
@@ -54,9 +53,20 @@ export const arrowTask = ((cache) => memoizeTask(cache, function copyMain(target
 }))({});
 
 export const arrowTSTask = ((cache) => memoizeTask(cache, async function copyTS(target, format) {
+    const umd = targetDir(`es5`, `umd`);
     const out = targetDir(target, format);
-    await mkdirp(out);
-    await pipeline(gulp.src(`src/**/*`), gulp.dest(out));
-    await del(`${out}/**/*.js`);
-}))({});
+    const arrowUMD = path.join(umd, `${mainExport}.js`);
+    const arrow2csvUMD = path.join(umd, `bin`, `arrow2csv.js`);
+
+    await mkdirp(path.join(out, 'bin'));
 
+    await Promise.all([
+        pipeline(gulp.src(`src/**/*`), gulp.dest(out)),
+        pipeline(
+            gulp.src([arrowUMD, arrow2csvUMD]),
+            gulpReplace(`../${mainExport}.js`, `./${mainExport}.js`),
+            gulp.dest(path.join(out, 'bin'))
+        ),
+        fs.writeFile(path.join(out, 'bin', 'package.json'), '{"type": "commonjs"}')
+    ]);
+}))({});
diff --git a/js/gulp/typescript-task.js b/js/gulp/typescript-task.js
index 0219219232..31769e3b1b 100644
--- a/js/gulp/typescript-task.js
+++ b/js/gulp/typescript-task.js
@@ -19,12 +19,13 @@ import { targetDir, tsconfigName, observableFromStreams, shouldRunInChildProcess
 
 import gulp from 'gulp';
 import path from 'path';
-import ts from 'gulp-typescript';
 import tsc from 'typescript';
+import ts from 'gulp-typescript';
+import * as fs from 'fs/promises';
 import sourcemaps from 'gulp-sourcemaps';
 import { memoizeTask } from './memoize-task.js';
-import { ReplaySubject, forkJoin as ObservableForkJoin } from 'rxjs';
-import { mergeWith, takeLast, share } from 'rxjs/operators';
+import { ReplaySubject, forkJoin as ObservableForkJoin, defer as ObservableDefer } from 'rxjs';
+import { mergeWith, takeLast, share, concat } from 'rxjs/operators';
 
 export const typescriptTask = ((cache) => memoizeTask(cache, function typescript(target, format) {
     if (shouldRunInChildProcess(target, format)) {
@@ -44,10 +45,15 @@ export default typescriptTask;
 export function compileBinFiles(target, format) {
     const out = targetDir(target, format);
     const tsconfigPath = path.join(`tsconfig`, `tsconfig.${tsconfigName('bin', 'cjs')}.json`);
-    return compileTypescript(path.join(out, 'bin'), tsconfigPath, { target });
+    const tsconfigOverrides = format === 'esm' ? { target, module: 'ES2015' } : { target };
+    return compileTypescript(out, tsconfigPath, tsconfigOverrides, false)
+      .pipe(takeLast(1))
+      .pipe(concat(ObservableDefer(() => {
+        return fs.chmod(path.join(out, 'bin', 'arrow2csv.js'), 0o755);
+      })));
 }
 
-function compileTypescript(out, tsconfigPath, tsconfigOverrides) {
+function compileTypescript(out, tsconfigPath, tsconfigOverrides, writeSourcemaps = true) {
     const tsProject = ts.createProject(tsconfigPath, { typescript: tsc, ...tsconfigOverrides });
     const { stream: { js, dts } } = observableFromStreams(
         tsProject.src(), sourcemaps.init(),
@@ -56,7 +62,15 @@ function compileTypescript(out, tsconfigPath, tsconfigOverrides) {
     const writeSources = observableFromStreams(tsProject.src(), gulp.dest(path.join(out, 'src')));
     const writeDTypes = observableFromStreams(dts, sourcemaps.write('./', { includeContent: false, sourceRoot: './src' }), gulp.dest(out));
     const mapFile = tsProject.options.module === tsc.ModuleKind.ES2015 ? esmMapFile : cjsMapFile;
-    const writeJS = observableFromStreams(js, sourcemaps.write('./', { mapFile, includeContent: false, sourceRoot: './src' }), gulp.dest(out));
+    const writeJSArgs = writeSourcemaps ? [
+        js,
+        sourcemaps.write('./', { mapFile, includeContent: false, sourceRoot: './src' }),
+        gulp.dest(out)
+      ] : [
+        js,
+        gulp.dest(out)
+      ];
+    const writeJS = observableFromStreams(...writeJSArgs);
     return ObservableForkJoin([writeSources, writeDTypes, writeJS]);
 }
 
diff --git a/js/gulpfile.js b/js/gulpfile.js
index 6544b987b7..bf84a4a9e1 100644
--- a/js/gulpfile.js
+++ b/js/gulpfile.js
@@ -54,6 +54,10 @@ knownTargets.forEach((target) => {
     ));
 });
 
+gulp.task(`build:ts`, gulp.series(
+    `build:es5:umd`, `clean:ts`, `compile:ts`, `package:ts`
+));
+
 // The main "apache-arrow" module builds the es2015/umd, es2015/cjs,
 // es2015/esm, and esnext/umd targets, then copies and renames the
 // compiled output into the apache-arrow folder
diff --git a/js/src/Arrow.ts b/js/src/Arrow.ts
index dc44e10b92..4a6394c266 100644
--- a/js/src/Arrow.ts
+++ b/js/src/Arrow.ts
@@ -99,6 +99,7 @@ import * as util_bit_ from './util/bit.js';
 import * as util_math_ from './util/math.js';
 import * as util_buffer_ from './util/buffer.js';
 import * as util_vector_ from './util/vector.js';
+import * as util_pretty_ from './util/pretty.js';
 import { compareSchemas, compareFields, compareTypes } from './visitor/typecomparator.js';
 
 /** @ignore */
@@ -109,6 +110,7 @@ export const util = {
     ...util_math_,
     ...util_buffer_,
     ...util_vector_,
+    ...util_pretty_,
     compareSchemas,
     compareFields,
     compareTypes,
diff --git a/js/src/bin/arrow2csv.ts b/js/src/bin/arrow2csv.ts
old mode 100644
new mode 100755
index eae7f5805c..39db8c1749
--- a/js/src/bin/arrow2csv.ts
+++ b/js/src/bin/arrow2csv.ts
@@ -21,8 +21,7 @@
 
 import * as fs from 'fs';
 import * as stream from 'stream';
-import { valueToString } from '../util/pretty.js';
-import { Schema, RecordBatch, RecordBatchReader, AsyncByteQueue } from '../Arrow.node.js';
+import { Schema, RecordBatch, RecordBatchReader, AsyncByteQueue, util } from '../Arrow.js';
 
 import commandLineUsage from 'command-line-usage';
 import commandLineArgs from 'command-line-args';
@@ -58,9 +57,10 @@ type ToStringState = {
         if (state.closed) { break; }
         for await (reader of recordBatchReaders(source)) {
             hasReaders = true;
-            const transformToString = batchesToString(state, reader.schema);
+            const batches = stream.Readable.from(reader);
+            const toString = batchesToString(state, reader.schema);
             await pipeTo(
-                reader.pipe(transformToString),
+                batches.pipe(toString),
                 process.stdout, { end: false }
             ).catch(() => state.closed = true); // Handle EPIPE errors
         }
@@ -129,7 +129,7 @@ function batchesToString(state: ToStringState, schema: Schema) {
     let maxColWidths = [10];
     const { hr, sep, metadata } = state;
 
-    const header = ['row_id', ...schema.fields.map((f) => `${f}`)].map(val => valueToString(val));
+    const header = ['row_id', ...schema.fields.map((f) => `${f}`)].map(val => util.valueToString(val));
 
     state.maxColWidths = header.map((x, i) => Math.max(maxColWidths[i] || 0, x.length));
 
@@ -181,7 +181,7 @@ function batchesToString(state: ToStringState, schema: Schema) {
                     if (rowId % 350 === 0) {
                         this.push(`${formatRow(header, maxColWidths, sep)}\n`);
                     }
-                    this.push(`${formatRow([rowId++, ...row.toArray()].map(v => valueToString(v)), maxColWidths, sep)}\n`);
+                    this.push(`${formatRow([rowId++, ...row.toArray()].map(v => util.valueToString(v)), maxColWidths, sep)}\n`);
                 }
             }
             cb();
@@ -202,7 +202,7 @@ function formatMetadataValue(value = '') {
     try {
         parsed = JSON.stringify(JSON.parse(value), null, 2);
     } catch { parsed = value; }
-    return valueToString(parsed).split('\n').join('\n  ');
+    return util.valueToString(parsed).split('\n').join('\n  ');
 }
 
 function formatMetadata(metadata: Map<string, string>) {
@@ -236,7 +236,7 @@ function measureColumnWidths(rowId: number, batch: RecordBatch, maxColWidths: nu
                     (val.length * elementWidth) // width of stringified 2^N-1
                 );
             } else {
-                maxColWidths[j + 1] = Math.max(maxColWidths[j + 1] || 0, valueToString(val).length);
+                maxColWidths[j + 1] = Math.max(maxColWidths[j + 1] || 0, util.valueToString(val).length);
             }
             ++j;
         }
diff --git a/js/tsconfig/tsconfig.base.json b/js/tsconfig/tsconfig.base.json
index fb4ecb38b5..0d7fefd909 100644
--- a/js/tsconfig/tsconfig.base.json
+++ b/js/tsconfig/tsconfig.base.json
@@ -1,5 +1,5 @@
 {
-  "exclude": ["../node_modules"],
+  "exclude": ["../node_modules", "../src/bin/*.ts"],
   "include": ["../src/**/*.ts"],
   "compileOnSave": false,
   "compilerOptions": {