You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@shindig.apache.org by ch...@apache.org on 2009/09/20 23:53:33 UTC
svn commit: r817109 -
/incubator/shindig/trunk/php/src/common/sample/BasicRemoteContentFetcher.php
Author: chabotc
Date: Sun Sep 20 21:53:33 2009
New Revision: 817109
URL: http://svn.apache.org/viewvc?rev=817109&view=rev
Log:
Attempt to do inteligent recoding on any xml/text/html/json body's to filter out invalid utf8 sequences. Invalid char sequences caused the labpixies todo gadget not to work
Modified:
incubator/shindig/trunk/php/src/common/sample/BasicRemoteContentFetcher.php
Modified: incubator/shindig/trunk/php/src/common/sample/BasicRemoteContentFetcher.php
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/php/src/common/sample/BasicRemoteContentFetcher.php?rev=817109&r1=817108&r2=817109&view=diff
==============================================================================
--- incubator/shindig/trunk/php/src/common/sample/BasicRemoteContentFetcher.php (original)
+++ incubator/shindig/trunk/php/src/common/sample/BasicRemoteContentFetcher.php Sun Sep 20 21:53:33 2009
@@ -86,6 +86,29 @@
private function parseResult(RemoteContentRequest $request, $content) {
$headers = '';
$body = '';
+ $httpCode = curl_getinfo($request->handle, CURLINFO_HTTP_CODE);
+ $contentType = curl_getinfo($request->handle, CURLINFO_CONTENT_TYPE);
+ // Attempt to magically convert all text'ish responses to UTF8, especially the xml and json parsers get upset if invalid UTF8 is encountered
+ $textTypes = array('text', 'html', 'json', 'xml', 'atom');
+ $isTextType = false;
+ foreach ($textTypes as $textType) {
+ if (strpos($contentType, $textType) !== false) {
+ $isTextType = true;
+ break;
+ }
+ }
+ if ($isTextType && function_exists('mb_convert_encoding')) {
+ $charset = 'UTF-8';
+ preg_match("/charset\s*=\s*([^\"' >]*)/ix",$content, $charset);
+ if (isset($charset[1])) {
+ $charset = trim($charset[1]);
+ if (($pos = strpos($charset, "\n")) !== false) {
+ $charset = trim(substr($charset, 0, $pos));
+ }
+ }
+ // the xml and json parsers get very upset if there are invalid UTF8 sequences in the string, by recoding it any bad chars will be filtered out
+ $content = mb_convert_encoding($content, 'UTF-8', $charset);
+ }
// on redirects and such we get multiple headers back from curl it seems, we really only want the last one
while (substr($content, 0, strlen('HTTP')) == 'HTTP' && strpos($content, "\r\n\r\n") !== false) {
$headers = substr($content, 0, strpos($content, "\r\n\r\n"));
@@ -101,8 +124,6 @@
$parsedHeaders[$key] = $val;
}
}
- $httpCode = curl_getinfo($request->handle, CURLINFO_HTTP_CODE);
- $contentType = curl_getinfo($request->handle, CURLINFO_CONTENT_TYPE);
if (! $httpCode) {
$httpCode = '404';
}