You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@whimsical.apache.org by se...@apache.org on 2022/05/02 13:52:56 UTC
[whimsy] branch master updated: Add basic interfaces to node Puppeteer
This is an automated email from the ASF dual-hosted git repository.
sebb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/whimsy.git
The following commit(s) were added to refs/heads/master by this push:
new b99a581b Add basic interfaces to node Puppeteer
b99a581b is described below
commit b99a581bd66dd0c45e92572a9e382a97d1c844a1
Author: Sebb <se...@apache.org>
AuthorDate: Mon May 2 14:52:50 2022 +0100
Add basic interfaces to node Puppeteer
---
tools/render-page.js | 18 ++++++++++++++++++
tools/scan-page.js | 41 +++++++++++++++++++++++++++++++++++++++++
2 files changed, 59 insertions(+)
diff --git a/tools/render-page.js b/tools/render-page.js
new file mode 100755
index 00000000..1694daef
--- /dev/null
+++ b/tools/render-page.js
@@ -0,0 +1,18 @@
+#!/usr/bin/env node
+
+// @(#) render a page that uses Javascript
+
+module.paths.push('/usr/local/lib/node_modules')
+
+const puppeteer = require('puppeteer');
+
+const target = process.argv[2] || 'http://apache.org/';
+
+(async () => {
+ const browser = await puppeteer.launch();
+ const page = await browser.newPage();
+ await page.goto(target);
+ let html = await page.content();
+ console.log(html)
+ await browser.close();
+})();
diff --git a/tools/scan-page.js b/tools/scan-page.js
new file mode 100755
index 00000000..90bd94c7
--- /dev/null
+++ b/tools/scan-page.js
@@ -0,0 +1,41 @@
+#!/usr/bin/env node
+
+// @(#) extract non-ASF links when loading a page
+
+module.paths.push('/usr/local/lib/node_modules')
+
+const puppeteer = require('puppeteer');
+
+const target = process.argv[2] || 'http://apache.org/';
+
+function isASFhost(host) {
+ return host == 'apache.org' || host.endsWith('.apache.org') || host.endsWith('.apachecon.com');
+}
+
+(async () => {
+ const browser = await puppeteer.launch();
+ const page = await browser.newPage();
+ await page.setRequestInterception(true);
+ page.on('request', (interceptedRequest) => {
+ // already handled?
+ if (interceptedRequest.isInterceptResolutionHandled()) return;
+
+ const url = interceptedRequest.url();
+ if (url == target) {
+ // must allow this through
+ interceptedRequest.continue();
+ } else {
+ let host = new URL(url).host
+ // don't visit non-ASF hosts
+ if (!isASFhost(host)) {
+ console.log(host);
+ interceptedRequest.abort();
+ } else {
+ // Need to visit at least an initial redirect
+ interceptedRequest.continue();
+ }
+ }
+ });
+ await page.goto(target);
+ await browser.close();
+})();