You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@shindig.apache.org by ch...@apache.org on 2009/02/23 15:41:33 UTC

svn commit: r747034 - in /incubator/shindig/trunk/php/src/gadgets: render/GadgetHtmlRenderer.php rewrite/ContentRewriter.php rewrite/DomRewriter.php rewrite/GadgetRewriter.php rewrite/SanitizeRewriter.php servlet/GadgetRenderingServlet.php

Author: chabotc
Date: Mon Feb 23 14:41:31 2009
New Revision: 747034

URL: http://svn.apache.org/viewvc?rev=747034&view=rev
Log:
Initial version of the DOM tree based rewriting. The tree is generated using php's DOMDocument (libxml2), and classes can implement tag name based observers for the elements they want to process. The Content Rewriter is already functional but not complete yet, smarter style parsing still needs to be added (dealing with import etc statements), and once it's stable enough, the rewriting should also be added to the ProxyHandler. To enable the rewriting in the proxy handler the rewriter adds a gadget=url to the proxy url, so it can use that to get the rewrite spec

Added:
    incubator/shindig/trunk/php/src/gadgets/rewrite/ContentRewriter.php
    incubator/shindig/trunk/php/src/gadgets/rewrite/DomRewriter.php
    incubator/shindig/trunk/php/src/gadgets/rewrite/GadgetRewriter.php
    incubator/shindig/trunk/php/src/gadgets/rewrite/SanitizeRewriter.php
Modified:
    incubator/shindig/trunk/php/src/gadgets/render/GadgetHtmlRenderer.php
    incubator/shindig/trunk/php/src/gadgets/servlet/GadgetRenderingServlet.php

Modified: incubator/shindig/trunk/php/src/gadgets/render/GadgetHtmlRenderer.php
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/php/src/gadgets/render/GadgetHtmlRenderer.php?rev=747034&r1=747033&r2=747034&view=diff
==============================================================================
--- incubator/shindig/trunk/php/src/gadgets/render/GadgetHtmlRenderer.php (original)
+++ incubator/shindig/trunk/php/src/gadgets/render/GadgetHtmlRenderer.php Mon Feb 23 14:41:31 2009
@@ -66,11 +66,17 @@
     $content .= $this->appendPreloads($gadget);
     $content .= "</script>";
 
-    // Append the content from the view
-    $content .= $gadget->substitutions->substitute($view['content']);
+    // Append the content for the selected view
+    $viewContent = $gadget->substitutions->substitute($view['content']);
+
+    // Rewrite the content, the GadgetRewriter will check for the Content-Rewrtie and Caja feature, if
+    // sanitazation is required, plus look for any template exapanding that needs to be done
+    $rewriter = new GadgetRewriter($this->context);
+    $content .= $rewriter->rewrite($viewContent, $gadget);
 
     // And add our runOnLoadHandlers() call
     $content .= "\n<script>gadgets.util.runOnLoadHandlers();</script></body>\n</html>";
+
     echo $content;
   }
 

Added: incubator/shindig/trunk/php/src/gadgets/rewrite/ContentRewriter.php
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/php/src/gadgets/rewrite/ContentRewriter.php?rev=747034&view=auto
==============================================================================
--- incubator/shindig/trunk/php/src/gadgets/rewrite/ContentRewriter.php (added)
+++ incubator/shindig/trunk/php/src/gadgets/rewrite/ContentRewriter.php Mon Feb 23 14:41:31 2009
@@ -0,0 +1,131 @@
+<?php
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+
+/**
+ * Implements the Content-Rewrite feature which rewrites all image, css and script
+ * links to their proxied versions, which can be quite a latency improvement, and
+ * save the gadget dev's server from melting down
+ *
+ */
+class ContentRewriter extends DomRewriter {
+  private $rewrite;
+  private $baseUrl;
+  private $defaultRewrite = array('include-url' => array('*'), 'exclude-url' => array(), 'refresh' => '86400');
+
+  public function __construct(GadgetContext $context, Gadget &$gadget) {
+    parent::__construct($context, $gadget);
+    // if no rewrite params are set in the gadget but rewrite_by_default is on, use our default rules (rewrite all)
+    if (! isset($gadget->gadgetSpec->rewrite) && Config::get('rewrite_by_default')) {
+      $this->rewrite = $this->defaultRewrite;
+    } else {
+      $this->rewrite = $gadget->gadgetSpec->rewrite;
+    }
+    // the base url of the gadget is used for relative paths
+    $this->baseUrl = substr($this->context->getUrl(), 0, strrpos($this->context->getUrl(), '/') + 1);
+  }
+
+  /**
+   * Register our dom node observers
+   *
+   * @param GadgetRewriter $gadgetRewriter
+   */
+  public function register(GadgetRewriter &$gadgetRewriter) {
+    $gadgetRewriter->addObserver('img', $this, 'rewriteImage');
+    $gadgetRewriter->addObserver('style', $this, 'rewriteStyle');
+    $gadgetRewriter->addObserver('script', $this, 'rewriteScript');
+    $gadgetRewriter->addObserver('link', $this, 'rewriteStyleLink');
+  }
+
+
+  /**
+   * Produces the proxied version of a URL if it falls within the content-rewrite params and
+   * will append a refresh param to the proxied url based on the expires param, and the gadget
+   * url so that the proxy server knows to rewrite it's content or not
+   *
+   * @param string $url
+   */
+  private function getProxyUrl($url) {
+    if (strpos(strtolower($url), 'http://') === false && strpos(strtolower($url), 'https://') === false) {
+      $url = $this->baseUrl . $url;
+    }
+    $url = Config::get('web_prefix') . '/gadgets/proxy?url=' . urlencode($url);
+    $url .= '&refresh=' . (isset($this->rewrite['expires']) && is_numeric($this->rewrite['expires']) ? $this->rewrite['expires'] : '3600');
+    $url .= '&gadget=' . urlencode($this->context->getUrl());
+    return $url;
+  }
+
+  /**
+   * Checks the URL against the include-url and exclude-url params
+   *
+   * @param string $url
+   */
+  private function includedUrl($url) {
+    $included = $excluded = false;
+    if (isset($this->rewrite['include-url'])) {
+      foreach ($this->rewrite['include-url'] as $includeUrl) {
+        if ($includeUrl == '*' || strpos($url, $includeUrl) !== false) {
+          $included = true;
+          break;
+        }
+      }
+    }
+    if (isset($this->rewrite['exclude-url'])) {
+      foreach ($this->rewrite['exclude-url'] as $excludeUrl) {
+        if ($excludeUrl == '*' || strpos($url, $excludeUrl) !== false) {
+          $excluded = true;
+          break;
+        }
+      }
+    }
+    return ($included && ! $excluded);
+  }
+
+  /**
+   * Rewrites the src attribute of an img tag
+   *
+   * @param DOMElement $node
+   */
+  public function rewriteImage(DOMElement &$node) {
+    if (($src = $node->getAttribute('src')) != null && $this->includedUrl($src)) {
+      $node->setAttribute('src', $this->getProxyUrl($src));
+    }
+  }
+
+  public function rewriteStyle(DOMElement &$node) {
+    // find import('foo') statements
+    // find and rewrite url('foo') statements (background, etc)
+  }
+
+  public function rewriteScript(DOMElement &$node) {
+    if (($src = $node->getAttribute('src')) != null && $this->includedUrl($src)) {
+      // make sure not to rewrite our forcedJsLibs src tag, else things break
+      if (strpos($src, '/gadgets/js') === false) {
+        $node->setAttribute('src', $this->getProxyUrl($src));
+      }
+    }
+  }
+
+  public function rewriteStyleLink(DOMElement &$node) {
+    if (($src = $node->getAttribute('href')) != null && $this->includedUrl($src)) {
+      $node->setAttribute('href', $this->getProxyUrl($src));
+    }
+  }
+}

Added: incubator/shindig/trunk/php/src/gadgets/rewrite/DomRewriter.php
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/php/src/gadgets/rewrite/DomRewriter.php?rev=747034&view=auto
==============================================================================
--- incubator/shindig/trunk/php/src/gadgets/rewrite/DomRewriter.php (added)
+++ incubator/shindig/trunk/php/src/gadgets/rewrite/DomRewriter.php Mon Feb 23 14:41:31 2009
@@ -0,0 +1,42 @@
+<?php
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * Abstract base class for DOM based rewriters. The GadgetRewriter will call the
+ *
+ *
+ */
+abstract class DomRewriter {
+  protected $context;
+  protected $gadget;
+
+  public function __construct(GadgetContext $context, Gadget &$gadget) {
+    $this->context = $context;
+    $this->gadget = $gadget;
+  }
+
+  /**
+   * Function to register the element => function mappings with the GadgetRewriter.
+   * Always use lower case tag names when calling GadgetRewriter->observer
+   *
+   * @param GadgetRewriter $gadgetRewriter
+   */
+  abstract public function register(GadgetRewriter &$gadgetRewriter);
+}
\ No newline at end of file

Added: incubator/shindig/trunk/php/src/gadgets/rewrite/GadgetRewriter.php
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/php/src/gadgets/rewrite/GadgetRewriter.php?rev=747034&view=auto
==============================================================================
--- incubator/shindig/trunk/php/src/gadgets/rewrite/GadgetRewriter.php (added)
+++ incubator/shindig/trunk/php/src/gadgets/rewrite/GadgetRewriter.php Mon Feb 23 14:41:31 2009
@@ -0,0 +1,117 @@
+<?php
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * The rewriter meta class, it checks the various gadget and configuration
+ * settings, and calls the appropiate classes for the registered dom element
+ * listeners
+ */
+class GadgetRewriter {
+  private $context;
+  private $domObservers = array();
+
+  public function __construct(GadgetContext $context) {
+    $this->context = $context;
+  }
+
+  /**
+   * Does the actual rewrite option scanning and performs the dom parsing
+   *
+   * @param string $content
+   * @param Gadget $gadget
+   */
+  public function rewrite($content, Gadget &$gadget) {
+    // Check to see if the gadget requested rewriting, or if rewriting is forced in the configuration
+    if (is_array($gadget->gadgetSpec->rewrite) || Config::get('rewrite_by_default')) {
+      require_once "src/gadgets/rewrite/ContentRewriter.php";
+      $contentRewriter = new ContentRewriter($this->context, $gadget);
+      $contentRewriter->register($this);
+    }
+
+    // Are we configured to sanitize certain views? (if so the config should be an array of view names to sanitize, iaw: array('profile', 'home'))
+    if (is_array(Config::get('sanitize_views'))) {
+      require_once "src/gadgets/rewrite/SanitizeRewriter.php";
+      $sanitizeRewriter = new SanitizeRewriter($this->context, $gadget);
+      $sanitizeRewriter->register($this);
+    }
+
+    // no observers registered, return the original content, otherwise parse the DOM tree and call the observers
+    if (!count($this->domObservers)) {
+      return $content;
+    } else {
+      libxml_use_internal_errors(true);
+      $doc = new DOMDocument(null, 'utf-8');
+      $doc->preserveWhiteSpace = false;
+      $doc->formatOutput = false;
+      $doc->strictErrorChecking = false;
+      $doc->recover = false;
+      if (! $doc->loadHtml($content)) {
+        // parsing failed, return the unmodified content
+        return $content;
+      }
+
+      // find and parse all nodes in the dom document
+      $rootNodes = $doc->getElementsByTagName('*');
+      $this->parseNodes($rootNodes);
+
+      // DomDocument tries to make the document a valid html document, so added the html/body/head elements to it.. so lets strip them off before returning the content
+      $html = $doc->saveHTML();
+      $html = preg_replace('/^<!DOCTYPE.+?>/', '', str_replace(array('&amp;', '<head>', '</head>', '<html>', '</html>', '<body>', '</body>'), array('&', '', '', '', '', '', ''), $html));
+
+      // If the gadget specified the caja feature, cajole it
+      if (in_array('caja', $gadget->features)) {
+        //TODO : use the caja daemon to cajole the content (experimental patch is available and will be added soon)
+      }
+
+      return $html;
+    }
+  }
+
+  /**
+   * This function should be called from the DomRewriter implmentation class in the form of:
+   * addObserver('img', $this, 'rewriteImage')
+   *
+   * @param string $tag
+   * @param object instance $class
+   * @param string $function
+   */
+  public function addObserver($tag, DomRewriter $class, $function) {
+    // add the tag => function to call relationship to our $observers array
+    $this->domObservers[] = array('tag' => $tag, 'class' => $class, 'function' => $function);
+  }
+
+  /**
+   * Parses the DOMNodeList $nodes and calls the registered rewriting function on nodes
+   *
+   * @param DOMNodeList $nodes
+   */
+  private function parseNodes(DOMNodeList &$nodes) {
+    foreach ($nodes as $node) {
+      $tagName = strtolower($node->tagName);
+      foreach ($this->domObservers as $observer) {
+        if ($observer['tag'] == $tagName) {
+          $class = $observer['class'];
+          $function = $observer['function'];
+          $class->$function($node);
+        }
+      }
+    }
+  }
+}

Added: incubator/shindig/trunk/php/src/gadgets/rewrite/SanitizeRewriter.php
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/php/src/gadgets/rewrite/SanitizeRewriter.php?rev=747034&view=auto
==============================================================================
--- incubator/shindig/trunk/php/src/gadgets/rewrite/SanitizeRewriter.php (added)
+++ incubator/shindig/trunk/php/src/gadgets/rewrite/SanitizeRewriter.php Mon Feb 23 14:41:31 2009
@@ -0,0 +1,45 @@
+<?php
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+
+/**
+ * Content sanitizer, removes any javascript from the view if this view is part of the
+ * sanitize_views array
+ */
+class SanitizeRewriter extends DomRewriter {
+
+  /**
+   * Register our dom node observers that will remove the javascript, but only
+   * if this view should be sanitized
+   *
+   * @param GadgetRewriter $gadgetRewriter
+   */
+  public function register(GadgetRewriter &$gadgetRewriter) {
+    $sanitizeViews = Config::get('sanitize_views');
+    // Only hook up our dom node observers if this view should be sanitized
+    if (in_array($this->context->getView(), $sanitizeViews)) {
+      $gadgetRewriter->addObserver('script', $this, 'rewriteScript');
+    }
+  }
+
+  public function rewriteScript(DOMElement $node) {
+    //TODO: remove it
+  }
+}
\ No newline at end of file

Modified: incubator/shindig/trunk/php/src/gadgets/servlet/GadgetRenderingServlet.php
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/php/src/gadgets/servlet/GadgetRenderingServlet.php?rev=747034&r1=747033&r2=747034&view=diff
==============================================================================
--- incubator/shindig/trunk/php/src/gadgets/servlet/GadgetRenderingServlet.php (original)
+++ incubator/shindig/trunk/php/src/gadgets/servlet/GadgetRenderingServlet.php Mon Feb 23 14:41:31 2009
@@ -37,6 +37,8 @@
 require_once 'src/gadgets/GadgetSpec.php';
 require_once 'src/gadgets/Gadget.php';
 require_once 'src/gadgets/render/GadgetRenderer.php';
+require_once 'src/gadgets/rewrite/GadgetRewriter.php';
+require_once 'src/gadgets/rewrite/DomRewriter.php';
 
 class GadgetRenderingServlet extends HttpServlet {
   private $context;