You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by an...@locus.apache.org on 2000/07/21 05:31:44 UTC
cvs commit: xml-xerces/c/src/util/NetAccessors/Socket UnixHTTPURLInputStream.cpp UnixHTTPURLInputStream.hpp
andyh 00/07/20 20:31:44
Modified: c/src/util/NetAccessors/Socket UnixHTTPURLInputStream.cpp
UnixHTTPURLInputStream.hpp
Log:
Improved (but still weak) http access by the parser.
Revision Changes Path
1.5 +141 -63 xml-xerces/c/src/util/NetAccessors/Socket/UnixHTTPURLInputStream.cpp
Index: UnixHTTPURLInputStream.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/util/NetAccessors/Socket/UnixHTTPURLInputStream.cpp,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- UnixHTTPURLInputStream.cpp 2000/05/15 22:31:28 1.4
+++ UnixHTTPURLInputStream.cpp 2000/07/21 03:31:41 1.5
@@ -1,37 +1,37 @@
/*
* The Apache Software License, Version 1.1
- *
+ *
* Copyright (c) 1999-2000 The Apache Software Foundation. All rights
* reserved.
- *
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
- *
+ *
* 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
+ * notice, this list of conditions and the following disclaimer.
+ *
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
- *
+ *
* 3. The end-user documentation included with the redistribution,
- * if any, must include the following acknowledgment:
+ * if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
- *
+ *
* 4. The names "Xerces" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
- * software without prior written permission. For written
+ * software without prior written permission. For written
* permission, please contact apache\@apache.org.
- *
+ *
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
- *
+ *
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
@@ -45,7 +45,7 @@
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
- *
+ *
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation, and was
* originally based on software copyright (c) 1999, International
@@ -56,6 +56,9 @@
/*
* $Log: UnixHTTPURLInputStream.cpp,v $
+ * Revision 1.5 2000/07/21 03:31:41 andyh
+ * Improved (but still weak) http access by the parser.
+ *
* Revision 1.4 2000/05/15 22:31:28 andyh
* Replace #include<memory.h> with <string.h> everywhere.
*
@@ -102,52 +105,37 @@
-//
-// This define specifies the size of the buffer used to read chunks
-// out of the URL input stream.
-//
-
-#define URLISBUFMAXSIZE 8192
-
-
-//
-// URL's, as per the standards, is essentially composed of just ASCII characters
-// and hence converting it to a 'char *' requires just to drop the leading zero
-// byte. However, the URL's have to be 'escaped', meaning that certain unsafe
-// and reserved characters have to be escaped to their corresponding hex values.
-//
-// The input Unicode string is assumed to be 0 terminated.
-// The caller is responsible to free the memory allocated to store the resultant
-// 'char *' string.
-//
-
-static char* localTranscode(const XMLCh* latinStrInUnicode)
-{
- unsigned int lent = XMLString::stringLen(latinStrInUnicode);
- char* retval = new char[lent + 1];
- unsigned int i = 0;
- for (i = 0; i < lent; i++)
- retval[i] = (char) latinStrInUnicode[i]; // drop the leading byte.
- retval[lent] = 0;
- return retval;
-}
-
UnixHTTPURLInputStream::UnixHTTPURLInputStream(const XMLURL& urlSource)
: fSocket(0)
, fBytesProcessed(0)
{
- const XMLCh* uri = urlSource.getURLText();
- char* uriAsCharStar = localTranscode(uri);
- ArrayJanitor<char> janBuf(uriAsCharStar);
+ //
+ // Pull all of the parts of the URL out of th urlSource object, and transcode them
+ // and transcode them back to ASCII.
+ //
const XMLCh* hostName = urlSource.getHost();
- char* hostNameAsCharStar = localTranscode(hostName);
+ char* hostNameAsCharStar = XMLString::transcode(hostName);
ArrayJanitor<char> janBuf1(hostNameAsCharStar);
+
+ const XMLCh* path = urlSource.getPath();
+ char* pathAsCharStar = XMLString::transcode(path);
+ ArrayJanitor<char> janBuf2(pathAsCharStar);
+
+ const XMLCh* fragment = urlSource.getFragment();
+ char* fragmentAsCharStar = 0;
+ if (fragment)
+ fragmentAsCharStar = XMLString::transcode(fragment);
+ ArrayJanitor<char> janBuf3(fragmentAsCharStar);
+
unsigned short portNumber = (unsigned short) urlSource.getPortNum();
+
+ //
+ // Set up a socket.
+ //
struct hostent* hostEntPtr = 0;
struct sockaddr_in sa;
- char obuf[1024]; // URL's should be < 1018 bytes.
if ((hostEntPtr = gethostbyname(hostNameAsCharStar)) == NULL)
@@ -158,8 +146,8 @@
ThrowXML(NetAccessorException,
XMLExcepts::NetAcc_TargetResolution);
}
- if ((hostEntPtr =
- gethostbyaddr((const char *) &numAddress,
+ if ((hostEntPtr =
+ gethostbyaddr((const char *) &numAddress,
sizeof(unsigned long), AF_INET)) == NULL)
{
ThrowXML(NetAccessorException,
@@ -183,19 +171,99 @@
{
ThrowXML(NetAccessorException,
XMLExcepts::NetAcc_ConnSocket);
+ }
+
+ // The port is open and ready to go.
+ // Build up the http GET command to send to the server.
+ // To do: We should really support http 1.1. This implementation
+ // is weak.
+ strcpy(fBuffer, "GET ");
+ strcat(fBuffer, pathAsCharStar);
+
+ if (fragmentAsCharStar != 0)
+ {
+ strcat(fBuffer, fragmentAsCharStar);
}
-
- // Now you can simply read and write from/to the socket.
+ strcat(fBuffer, " HTTP/1.0\r\n");
- sprintf(obuf, "GET %s\n\n", uriAsCharStar);
- int lent = strlen(obuf);
+
+ strcat(fBuffer, "Host: ");
+ strcat(fBuffer, hostNameAsCharStar);
+ if (portNumber != 80)
+ {
+ int i = strlen(fBuffer);
+ sprintf(fBuffer+i, "%d", portNumber);
+ // _itoa(portNumber, fBuffer+i, 10);
+ }
+ strcat(fBuffer, "\r\n\r\n");
+
+ // Send the http request
+ int lent = strlen(fBuffer);
int aLent = 0;
- if ((aLent = write(s, (void *) obuf, lent)) != lent)
+ if ((aLent = write(s, (void *) fBuffer, lent)) != lent)
{
ThrowXML(NetAccessorException,
XMLExcepts::NetAcc_WriteSocket);
}
+ //
+ // get the response, check the http header for errors from the server.
+ //
+ aLent = read(s, (void *)fBuffer, sizeof(fBuffer)-1);
+ if (aLent <= 0)
+ {
+ ThrowXML(NetAccessorException, XMLExcepts::NetAcc_ReadSocket);
+ }
+
+ fBufferEnd = fBuffer+aLent;
+ *fBufferEnd = 0;
+
+ // Find the break between the returned http header and any data.
+ // (Delimited by a blank line)
+ // Hang on to any data for use by the first read from this BinHTTPURLInputStream.
+ //
+ fBufferPos = strstr(fBuffer, "\r\n\r\n");
+ if (fBufferPos != 0)
+ {
+ fBufferPos += 4;
+ *(fBufferPos-2) = 0;
+ }
+ else
+ {
+ fBufferPos = strstr(fBuffer, "\n\n");
+ if (fBufferPos != 0)
+ {
+ fBufferPos += 2;
+ *(fBufferPos-1) = 0;
+ }
+ else
+ fBufferPos = fBufferEnd;
+ }
+
+ // Make sure the header includes an HTTP 200 OK response.
+ //
+ char *p = strstr(fBuffer, "HTTP");
+ if (p == 0)
+ {
+ ThrowXML(NetAccessorException, XMLExcepts::NetAcc_ReadSocket);
+ }
+
+ p = strchr(p, ' ');
+ if (p == 0)
+ {
+ ThrowXML(NetAccessorException, XMLExcepts::NetAcc_ReadSocket);
+ }
+
+ int httpResponse = atoi(p);
+ if (httpResponse != 200)
+ {
+ // Most likely a 404 Not Found error.
+ // Should recognize and handle the forwarding responses.
+ //
+ ThrowXML(NetAccessorException, XMLExcepts::File_CouldNotOpenFile);
+ }
+
+
fSocket = s;
}
@@ -212,19 +280,29 @@
unsigned int UnixHTTPURLInputStream::readBytes(XMLByte* const toFill
, const unsigned int maxToRead)
{
- unsigned int retval = 0;
-
- int lent = read(fSocket, (void *) toFill, maxToRead);
-
- if (lent < 0)
+ unsigned int len = fBufferEnd - fBufferPos;
+ if (len > 0)
{
- ThrowXML(NetAccessorException, XMLExcepts::NetAcc_ReadSocket);
+ // If there's any data left over in the buffer into which we first
+ // read from the server (to get the http header), return that.
+ if (len > maxToRead)
+ len = maxToRead;
+ memcpy(toFill, fBufferPos, len);
+ fBufferPos += len;
}
else
{
- retval = lent;
- fBytesProcessed += retval;
+ // There was no data in the local buffer.
+ // Read some from the socket, straight into our caller's buffer.
+ //
+ len = read(fSocket, (void *) toFill, maxToRead);
+ if (len == -1)
+ {
+ ThrowXML(NetAccessorException, XMLExcepts::NetAcc_ReadSocket);
+ }
}
- return retval;
+ fBytesProcessed += len;
+ return len;
}
+
1.3 +28 -12 xml-xerces/c/src/util/NetAccessors/Socket/UnixHTTPURLInputStream.hpp
Index: UnixHTTPURLInputStream.hpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/util/NetAccessors/Socket/UnixHTTPURLInputStream.hpp,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- UnixHTTPURLInputStream.hpp 2000/03/22 00:58:12 1.2
+++ UnixHTTPURLInputStream.hpp 2000/07/21 03:31:42 1.3
@@ -1,37 +1,37 @@
/*
* The Apache Software License, Version 1.1
- *
+ *
* Copyright (c) 1999-2000 The Apache Software Foundation. All rights
* reserved.
- *
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
- *
+ *
* 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
+ * notice, this list of conditions and the following disclaimer.
+ *
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
- *
+ *
* 3. The end-user documentation included with the redistribution,
- * if any, must include the following acknowledgment:
+ * if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
- *
+ *
* 4. The names "Xerces" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
- * software without prior written permission. For written
+ * software without prior written permission. For written
* permission, please contact apache\@apache.org.
- *
+ *
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
- *
+ *
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
@@ -45,7 +45,7 @@
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
- *
+ *
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation, and was
* originally based on software copyright (c) 1999, International
@@ -56,6 +56,9 @@
/*
* $Log: UnixHTTPURLInputStream.hpp,v $
+ * Revision 1.3 2000/07/21 03:31:42 andyh
+ * Improved (but still weak) http access by the parser.
+ *
* Revision 1.2 2000/03/22 00:58:12 rahulj
* Now we throw exceptions when errors occur.
* Simplified code based on assumption that calling
@@ -111,10 +114,20 @@
// fBytesProcessed
// Its a rolling count of the number of bytes processed off this
// input stream.
+ // fBuffer
+ // Holds the http header, plus the first part of the actual
+ // data. Filled at the time the stream is opened, data goes
+ // out to user in response to readBytes().
+ // fBufferPos, fBufferEnd
+ // Pointers into fBuffer, showing start and end+1 of content
+ // that readBytes must return.
// -----------------------------------------------------------------------
int fSocket;
unsigned int fBytesProcessed;
+ char fBuffer[4000];
+ char * fBufferEnd;
+ char * fBufferPos;
}; // UnixHTTPURLInputStream