You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by an...@locus.apache.org on 2000/07/21 05:31:44 UTC

cvs commit: xml-xerces/c/src/util/NetAccessors/Socket UnixHTTPURLInputStream.cpp UnixHTTPURLInputStream.hpp

andyh       00/07/20 20:31:44

  Modified:    c/src/util/NetAccessors/Socket UnixHTTPURLInputStream.cpp
                        UnixHTTPURLInputStream.hpp
  Log:
  Improved (but still weak) http access by the parser.
  
  Revision  Changes    Path
  1.5       +141 -63   xml-xerces/c/src/util/NetAccessors/Socket/UnixHTTPURLInputStream.cpp
  
  Index: UnixHTTPURLInputStream.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/util/NetAccessors/Socket/UnixHTTPURLInputStream.cpp,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- UnixHTTPURLInputStream.cpp	2000/05/15 22:31:28	1.4
  +++ UnixHTTPURLInputStream.cpp	2000/07/21 03:31:41	1.5
  @@ -1,37 +1,37 @@
   /*
    * The Apache Software License, Version 1.1
  - * 
  + *
    * Copyright (c) 1999-2000 The Apache Software Foundation.  All rights
    * reserved.
  - * 
  + *
    * Redistribution and use in source and binary forms, with or without
    * modification, are permitted provided that the following conditions
    * are met:
  - * 
  + *
    * 1. Redistributions of source code must retain the above copyright
  - *    notice, this list of conditions and the following disclaimer. 
  - * 
  + *    notice, this list of conditions and the following disclaimer.
  + *
    * 2. Redistributions in binary form must reproduce the above copyright
    *    notice, this list of conditions and the following disclaimer in
    *    the documentation and/or other materials provided with the
    *    distribution.
  - * 
  + *
    * 3. The end-user documentation included with the redistribution,
  - *    if any, must include the following acknowledgment:  
  + *    if any, must include the following acknowledgment:
    *       "This product includes software developed by the
    *        Apache Software Foundation (http://www.apache.org/)."
    *    Alternately, this acknowledgment may appear in the software itself,
    *    if and wherever such third-party acknowledgments normally appear.
  - * 
  + *
    * 4. The names "Xerces" and "Apache Software Foundation" must
    *    not be used to endorse or promote products derived from this
  - *    software without prior written permission. For written 
  + *    software without prior written permission. For written
    *    permission, please contact apache\@apache.org.
  - * 
  + *
    * 5. Products derived from this software may not be called "Apache",
    *    nor may "Apache" appear in their name, without prior written
    *    permission of the Apache Software Foundation.
  - * 
  + *
    * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
    * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
    * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  @@ -45,7 +45,7 @@
    * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
    * SUCH DAMAGE.
    * ====================================================================
  - * 
  + *
    * This software consists of voluntary contributions made by many
    * individuals on behalf of the Apache Software Foundation, and was
    * originally based on software copyright (c) 1999, International
  @@ -56,6 +56,9 @@
   
   /*
    * $Log: UnixHTTPURLInputStream.cpp,v $
  + * Revision 1.5  2000/07/21 03:31:41  andyh
  + * Improved (but still weak) http access by the parser.
  + *
    * Revision 1.4  2000/05/15 22:31:28  andyh
    * Replace #include<memory.h> with <string.h> everywhere.
    *
  @@ -102,52 +105,37 @@
   
   
   
  -//
  -// This define specifies the size of the buffer used to read chunks
  -// out of the URL input stream.
  -//
  -
  -#define URLISBUFMAXSIZE        8192
  -
  -
  -//
  -// URL's, as per the standards, is essentially composed of just ASCII characters
  -// and hence converting it to a 'char *' requires just to drop the leading zero
  -// byte. However, the URL's have to be 'escaped', meaning that certain unsafe
  -// and reserved characters have to be escaped to their corresponding hex values.
  -//
  -// The input Unicode string is assumed to be 0 terminated.
  -// The caller is responsible to free the memory allocated to store the resultant
  -// 'char *' string.
  -//
  -
  -static char* localTranscode(const XMLCh* latinStrInUnicode)
  -{
  -    unsigned int   lent = XMLString::stringLen(latinStrInUnicode);
  -    char*  retval = new char[lent + 1];
  -    unsigned int  i = 0;
  -    for (i = 0; i < lent; i++)
  -        retval[i] = (char) latinStrInUnicode[i]; // drop the leading byte.
  -    retval[lent] = 0;
  -    return retval;
  -}
  -
   
   
   UnixHTTPURLInputStream::UnixHTTPURLInputStream(const XMLURL& urlSource)
         : fSocket(0)
         , fBytesProcessed(0)
   {
  -    const XMLCh*        uri = urlSource.getURLText();
  -    char*               uriAsCharStar = localTranscode(uri);
  -    ArrayJanitor<char>  janBuf(uriAsCharStar);
  +    //
  +    // Pull all of the parts of the URL out of th urlSource object, and transcode them
  +    //   and transcode them back to ASCII.
  +    //
       const XMLCh*        hostName = urlSource.getHost();
  -    char*               hostNameAsCharStar = localTranscode(hostName);
  +    char*               hostNameAsCharStar = XMLString::transcode(hostName);
       ArrayJanitor<char>  janBuf1(hostNameAsCharStar);
  +
  +    const XMLCh*        path = urlSource.getPath();
  +    char*               pathAsCharStar = XMLString::transcode(path);
  +    ArrayJanitor<char>  janBuf2(pathAsCharStar);
  +
  +    const XMLCh*        fragment = urlSource.getFragment();
  +    char*               fragmentAsCharStar = 0;
  +    if (fragment)
  +        fragmentAsCharStar = XMLString::transcode(fragment);
  +    ArrayJanitor<char>  janBuf3(fragmentAsCharStar);
  +
       unsigned short      portNumber = (unsigned short) urlSource.getPortNum();
  +
  +    //
  +    // Set up a socket.
  +    //
       struct hostent*     hostEntPtr = 0;
       struct sockaddr_in  sa;
  -    char                obuf[1024];  // URL's should be < 1018 bytes.
   
   
       if ((hostEntPtr = gethostbyname(hostNameAsCharStar)) == NULL)
  @@ -158,8 +146,8 @@
               ThrowXML(NetAccessorException,
                        XMLExcepts::NetAcc_TargetResolution);
           }
  -        if ((hostEntPtr = 
  -                gethostbyaddr((const char *) &numAddress, 
  +        if ((hostEntPtr =
  +                gethostbyaddr((const char *) &numAddress,
                                 sizeof(unsigned long), AF_INET)) == NULL)
           {
               ThrowXML(NetAccessorException,
  @@ -183,19 +171,99 @@
       {
           ThrowXML(NetAccessorException,
                    XMLExcepts::NetAcc_ConnSocket);
  +    }
  +
  +    // The port is open and ready to go.
  +    // Build up the http GET command to send to the server.
  +    // To do:  We should really support http 1.1.  This implementation
  +    //         is weak.
  +    strcpy(fBuffer, "GET ");
  +    strcat(fBuffer, pathAsCharStar);
  +
  +    if (fragmentAsCharStar != 0)
  +    {
  +        strcat(fBuffer, fragmentAsCharStar);
       }
  -    
  -    // Now you can simply read and write from/to the socket.
  +    strcat(fBuffer, " HTTP/1.0\r\n");
   
  -    sprintf(obuf, "GET %s\n\n", uriAsCharStar);
  -    int lent = strlen(obuf);
  +
  +    strcat(fBuffer, "Host: ");
  +    strcat(fBuffer, hostNameAsCharStar);
  +    if (portNumber != 80)
  +    {
  +        int i = strlen(fBuffer);
  +		sprintf(fBuffer+i, "%d", portNumber);
  +        // _itoa(portNumber, fBuffer+i, 10);
  +    }
  +    strcat(fBuffer, "\r\n\r\n");
  +
  +    // Send the http request
  +    int lent = strlen(fBuffer);
       int  aLent = 0;
  -    if ((aLent = write(s, (void *) obuf, lent)) != lent)
  +    if ((aLent = write(s, (void *) fBuffer, lent)) != lent)
       {
           ThrowXML(NetAccessorException,
                    XMLExcepts::NetAcc_WriteSocket);
       }
   
  +    //
  +    // get the response, check the http header for errors from the server.
  +    //
  +    aLent = read(s, (void *)fBuffer, sizeof(fBuffer)-1);
  +    if (aLent <= 0)
  +    {
  +        ThrowXML(NetAccessorException, XMLExcepts::NetAcc_ReadSocket);
  +    }
  +
  +    fBufferEnd = fBuffer+aLent;
  +    *fBufferEnd = 0;
  +
  +    // Find the break between the returned http header and any data.
  +    //  (Delimited by a blank line)
  +    // Hang on to any data for use by the first read from this BinHTTPURLInputStream.
  +    //
  +    fBufferPos = strstr(fBuffer, "\r\n\r\n");
  +    if (fBufferPos != 0)
  +    {
  +        fBufferPos += 4;
  +        *(fBufferPos-2) = 0;
  +    }
  +    else
  +    {
  +        fBufferPos = strstr(fBuffer, "\n\n");
  +        if (fBufferPos != 0)
  +        {
  +            fBufferPos += 2;
  +            *(fBufferPos-1) = 0;
  +        }
  +        else
  +            fBufferPos = fBufferEnd;
  +    }
  +
  +    // Make sure the header includes an HTTP 200 OK response.
  +    //
  +    char *p = strstr(fBuffer, "HTTP");
  +    if (p == 0)
  +    {
  +        ThrowXML(NetAccessorException, XMLExcepts::NetAcc_ReadSocket);
  +    }
  +
  +    p = strchr(p, ' ');
  +    if (p == 0)
  +    {
  +        ThrowXML(NetAccessorException, XMLExcepts::NetAcc_ReadSocket);
  +    }
  +
  +    int httpResponse = atoi(p);
  +    if (httpResponse != 200)
  +    {
  +        // Most likely a 404 Not Found error.
  +        //   Should recognize and handle the forwarding responses.
  +        //
  +        ThrowXML(NetAccessorException, XMLExcepts::File_CouldNotOpenFile);
  +    }
  +
  +
       fSocket = s;
   
   }
  @@ -212,19 +280,29 @@
   unsigned int UnixHTTPURLInputStream::readBytes(XMLByte* const    toFill
                                         , const unsigned int    maxToRead)
   {
  -    unsigned int  retval = 0;
  -
  -    int lent = read(fSocket, (void *) toFill, maxToRead); 
  -
  -    if (lent < 0)
  +    unsigned int len = fBufferEnd - fBufferPos;
  +    if (len > 0)
       {
  -        ThrowXML(NetAccessorException, XMLExcepts::NetAcc_ReadSocket);
  +        // If there's any data left over in the buffer into which we first
  +        //   read from the server (to get the http header), return that.
  +        if (len > maxToRead)
  +            len = maxToRead;
  +        memcpy(toFill, fBufferPos, len);
  +        fBufferPos += len;
       }
       else
       {
  -        retval = lent;
  -        fBytesProcessed += retval;
  +        // There was no data in the local buffer.
  +        // Read some from the socket, straight into our caller's buffer.
  +        //
  +        len = read(fSocket, (void *) toFill, maxToRead);
  +        if (len == -1)
  +        {
  +            ThrowXML(NetAccessorException, XMLExcepts::NetAcc_ReadSocket);
  +        }
       }
   
  -    return retval;
  +    fBytesProcessed += len;
  +    return len;
   }
  +
  
  
  
  1.3       +28 -12    xml-xerces/c/src/util/NetAccessors/Socket/UnixHTTPURLInputStream.hpp
  
  Index: UnixHTTPURLInputStream.hpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/util/NetAccessors/Socket/UnixHTTPURLInputStream.hpp,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- UnixHTTPURLInputStream.hpp	2000/03/22 00:58:12	1.2
  +++ UnixHTTPURLInputStream.hpp	2000/07/21 03:31:42	1.3
  @@ -1,37 +1,37 @@
   /*
    * The Apache Software License, Version 1.1
  - * 
  + *
    * Copyright (c) 1999-2000 The Apache Software Foundation.  All rights
    * reserved.
  - * 
  + *
    * Redistribution and use in source and binary forms, with or without
    * modification, are permitted provided that the following conditions
    * are met:
  - * 
  + *
    * 1. Redistributions of source code must retain the above copyright
  - *    notice, this list of conditions and the following disclaimer. 
  - * 
  + *    notice, this list of conditions and the following disclaimer.
  + *
    * 2. Redistributions in binary form must reproduce the above copyright
    *    notice, this list of conditions and the following disclaimer in
    *    the documentation and/or other materials provided with the
    *    distribution.
  - * 
  + *
    * 3. The end-user documentation included with the redistribution,
  - *    if any, must include the following acknowledgment:  
  + *    if any, must include the following acknowledgment:
    *       "This product includes software developed by the
    *        Apache Software Foundation (http://www.apache.org/)."
    *    Alternately, this acknowledgment may appear in the software itself,
    *    if and wherever such third-party acknowledgments normally appear.
  - * 
  + *
    * 4. The names "Xerces" and "Apache Software Foundation" must
    *    not be used to endorse or promote products derived from this
  - *    software without prior written permission. For written 
  + *    software without prior written permission. For written
    *    permission, please contact apache\@apache.org.
  - * 
  + *
    * 5. Products derived from this software may not be called "Apache",
    *    nor may "Apache" appear in their name, without prior written
    *    permission of the Apache Software Foundation.
  - * 
  + *
    * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
    * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
    * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  @@ -45,7 +45,7 @@
    * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
    * SUCH DAMAGE.
    * ====================================================================
  - * 
  + *
    * This software consists of voluntary contributions made by many
    * individuals on behalf of the Apache Software Foundation, and was
    * originally based on software copyright (c) 1999, International
  @@ -56,6 +56,9 @@
   
   /*
    * $Log: UnixHTTPURLInputStream.hpp,v $
  + * Revision 1.3  2000/07/21 03:31:42  andyh
  + * Improved (but still weak) http access by the parser.
  + *
    * Revision 1.2  2000/03/22 00:58:12  rahulj
    * Now we throw exceptions when errors occur.
    * Simplified code based on assumption that calling
  @@ -111,10 +114,20 @@
       //  fBytesProcessed
       //      Its a rolling count of the number of bytes processed off this
       //      input stream.
  +    //  fBuffer
  +    //      Holds the http header, plus the first part of the actual
  +    //      data.  Filled at the time the stream is opened, data goes
  +    //      out to user in response to readBytes().
  +    //  fBufferPos, fBufferEnd
  +    //      Pointers into fBuffer, showing start and end+1 of content
  +    //      that readBytes must return.
       // -----------------------------------------------------------------------
   
       int                 fSocket;
       unsigned int        fBytesProcessed;
  +    char                fBuffer[4000];
  +    char *              fBufferEnd;
  +    char *              fBufferPos;
   
   }; // UnixHTTPURLInputStream