You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@whimsical.apache.org by se...@apache.org on 2022/05/02 13:52:56 UTC

[whimsy] branch master updated: Add basic interfaces to node Puppeteer

This is an automated email from the ASF dual-hosted git repository.

sebb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/whimsy.git


The following commit(s) were added to refs/heads/master by this push:
     new b99a581b Add basic interfaces to node Puppeteer
b99a581b is described below

commit b99a581bd66dd0c45e92572a9e382a97d1c844a1
Author: Sebb <se...@apache.org>
AuthorDate: Mon May 2 14:52:50 2022 +0100

    Add basic interfaces to node Puppeteer
---
 tools/render-page.js | 18 ++++++++++++++++++
 tools/scan-page.js   | 41 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+)

diff --git a/tools/render-page.js b/tools/render-page.js
new file mode 100755
index 00000000..1694daef
--- /dev/null
+++ b/tools/render-page.js
@@ -0,0 +1,18 @@
+#!/usr/bin/env node
+
+// @(#) render a page that uses Javascript
+
+module.paths.push('/usr/local/lib/node_modules')
+
+const puppeteer = require('puppeteer');
+
+const target = process.argv[2] || 'http://apache.org/';
+
+(async () => {
+  const browser = await puppeteer.launch();
+  const page = await browser.newPage();
+  await page.goto(target);
+  let html = await page.content();
+  console.log(html)
+  await browser.close();
+})();
diff --git a/tools/scan-page.js b/tools/scan-page.js
new file mode 100755
index 00000000..90bd94c7
--- /dev/null
+++ b/tools/scan-page.js
@@ -0,0 +1,41 @@
+#!/usr/bin/env node
+
+// @(#) extract non-ASF links when loading a page
+
+module.paths.push('/usr/local/lib/node_modules')
+
+const puppeteer = require('puppeteer');
+
+const target = process.argv[2] || 'http://apache.org/';
+
+function isASFhost(host) {
+    return host == 'apache.org' || host.endsWith('.apache.org') || host.endsWith('.apachecon.com');
+}
+
+(async () => {
+  const browser = await puppeteer.launch();
+  const page = await browser.newPage();
+  await page.setRequestInterception(true);
+  page.on('request', (interceptedRequest) => {
+    // already handled?
+    if (interceptedRequest.isInterceptResolutionHandled()) return;
+
+    const url = interceptedRequest.url();
+    if (url == target) {
+        // must allow this through
+        interceptedRequest.continue();
+    } else {
+        let host = new URL(url).host
+        // don't visit non-ASF hosts
+        if (!isASFhost(host)) {
+            console.log(host);
+            interceptedRequest.abort();
+        } else { 
+            // Need to visit at least an initial redirect
+            interceptedRequest.continue();
+        }
+    }
+  });
+  await page.goto(target);
+  await browser.close();
+})();