You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@shindig.apache.org by ch...@apache.org on 2009/09/20 23:53:33 UTC

svn commit: r817109 - /incubator/shindig/trunk/php/src/common/sample/BasicRemoteContentFetcher.php

Author: chabotc
Date: Sun Sep 20 21:53:33 2009
New Revision: 817109

URL: http://svn.apache.org/viewvc?rev=817109&view=rev
Log:
Attempt to do inteligent recoding on any xml/text/html/json body's to filter out invalid utf8 sequences. Invalid char sequences caused the labpixies todo gadget not to work

Modified:
    incubator/shindig/trunk/php/src/common/sample/BasicRemoteContentFetcher.php

Modified: incubator/shindig/trunk/php/src/common/sample/BasicRemoteContentFetcher.php
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/php/src/common/sample/BasicRemoteContentFetcher.php?rev=817109&r1=817108&r2=817109&view=diff
==============================================================================
--- incubator/shindig/trunk/php/src/common/sample/BasicRemoteContentFetcher.php (original)
+++ incubator/shindig/trunk/php/src/common/sample/BasicRemoteContentFetcher.php Sun Sep 20 21:53:33 2009
@@ -86,6 +86,29 @@
   private function parseResult(RemoteContentRequest $request, $content) {
     $headers = '';
     $body = '';
+    $httpCode = curl_getinfo($request->handle, CURLINFO_HTTP_CODE);
+    $contentType = curl_getinfo($request->handle, CURLINFO_CONTENT_TYPE);
+    // Attempt to magically convert all text'ish responses to UTF8, especially the xml and json parsers get upset if invalid UTF8 is encountered
+    $textTypes = array('text', 'html', 'json', 'xml', 'atom');
+    $isTextType = false;
+    foreach ($textTypes as $textType) {
+    	if (strpos($contentType, $textType) !== false) {
+    		$isTextType = true;
+    		break;
+    	}
+    }
+    if ($isTextType && function_exists('mb_convert_encoding')) {
+      $charset = 'UTF-8';
+   		preg_match("/charset\s*=\s*([^\"' >]*)/ix",$content, $charset);
+   		if (isset($charset[1])) {
+   			$charset = trim($charset[1]);
+   			if (($pos = strpos($charset, "\n")) !== false) {
+   			  $charset = trim(substr($charset, 0, $pos));
+   			}
+   		}
+   		// the xml and json parsers get very upset if there are invalid UTF8 sequences in the string, by recoding it any bad chars will be filtered out
+      $content = mb_convert_encoding($content, 'UTF-8', $charset);
+  	}
     // on redirects and such we get multiple headers back from curl it seems, we really only want the last one
     while (substr($content, 0, strlen('HTTP')) == 'HTTP' && strpos($content, "\r\n\r\n") !== false) {
       $headers = substr($content, 0, strpos($content, "\r\n\r\n"));
@@ -101,8 +124,6 @@
         $parsedHeaders[$key] = $val;
       }
     }
-    $httpCode = curl_getinfo($request->handle, CURLINFO_HTTP_CODE);
-    $contentType = curl_getinfo($request->handle, CURLINFO_CONTENT_TYPE);
     if (! $httpCode) {
       $httpCode = '404';
     }