You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-dev@axis.apache.org by sh...@apache.org on 2009/09/08 13:30:32 UTC

svn commit: r812468 - /webservices/axis2/trunk/c/guththila/src/guththila_xml_parser.c

Author: shankar
Date: Tue Sep  8 11:30:32 2009
New Revision: 812468

URL: http://svn.apache.org/viewvc?rev=812468&view=rev
Log:
improving performance

Modified:
    webservices/axis2/trunk/c/guththila/src/guththila_xml_parser.c

Modified: webservices/axis2/trunk/c/guththila/src/guththila_xml_parser.c
URL: http://svn.apache.org/viewvc/webservices/axis2/trunk/c/guththila/src/guththila_xml_parser.c?rev=812468&r1=812467&r2=812468&view=diff
==============================================================================
--- webservices/axis2/trunk/c/guththila/src/guththila_xml_parser.c (original)
+++ webservices/axis2/trunk/c/guththila/src/guththila_xml_parser.c Tue Sep  8 11:30:32 2009
@@ -31,36 +31,66 @@
     guththila_t * m,
     const axutil_env_t * env);
 
+/*
+ * Read until finding '<' character
+ */
+static int
+guththila_search_for_start_element(
+    guththila_t *m,
+    const axutil_env_t *env);
+
 /* part of guththila_next_char method. this was included as macro for performance. 99% of the time
  * following will be called, so having it as next_char method is very expensive (method calling
  * overhead is higher) so, common case is checked as part of the macro and if not satisfied, method
  * is called
  */
-#define GUTHTHILA_NEXT_CHAR(m, reader_type, env, c)\
+#define GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, env, c)\
 {\
-    if(reader_type == GUTHTHILA_MEMORY_READER)\
+    int result_found = 0;\
+    if(!buffer)\
     {\
-        int index = m->next++;\
-        if(index < m->buffer.data_size[0])\
+        if(m->reader->type == GUTHTHILA_MEMORY_READER)\
         {\
-            c = m->buffer.buff[0][index];\
+            buffer = m->buffer.buff[0];\
+            data_size = m->buffer.data_size[0];\
+            previous_size = 0;\
         }\
         else\
         {\
-            c = -1;\
+            if(m->buffer.cur_buff != -1)\
+            {\
+                buffer = m->buffer.buff[m->buffer.cur_buff];\
+                data_size = GUTHTHILA_BUFFER_CURRENT_DATA_SIZE(m->buffer);\
+                previous_size = GUTHTHILA_BUFFER_PRE_DATA_SIZE(m->buffer);\
+            }\
+            else\
+            {\
+                c = guththila_next_char(m, env);\
+                result_found  = 1;\
+            }\
         }\
     }\
-    else\
+\
+    if(!result_found)\
     {\
-        if(m->buffer.cur_buff != -1 && m->next < GUTHTHILA_BUFFER_PRE_DATA_SIZE(m->buffer)\
-            + GUTHTHILA_BUFFER_CURRENT_DATA_SIZE(m->buffer))\
+        int index = m->next++ - previous_size;\
+        if(index < data_size)\
         {\
-            c = m->buffer.buff[m->buffer.cur_buff][m->next++ - GUTHTHILA_BUFFER_PRE_DATA_SIZE(\
-                m->buffer)];\
+            c = buffer[index];\
         }\
         else\
         {\
-            c = guththila_next_char(m, env);\
+            buffer = NULL;\
+            data_size = -1;\
+            --(m->next);\
+            if(m->reader->type == GUTHTHILA_MEMORY_READER)\
+            {\
+                c = -1;\
+            }\
+            else\
+            {\
+                c = guththila_next_char(m, env);\
+            }\
         }\
     }\
 }
@@ -99,14 +129,14 @@
  * Read characters until all the white spaces are read.
  */
 #ifndef GUTHTHILA_SKIP_SPACES
-#define GUTHTHILA_SKIP_SPACES(m, c, reader_type, _env)while(0x20 == c || 0x9 == c || 0xD == c || 0xA == c){GUTHTHILA_NEXT_CHAR(m, reader_type, _env, c);}
+#define GUTHTHILA_SKIP_SPACES(m, c, buffer, data_size, previous_size, _env)while(0x20 == c || 0x9 == c || 0xD == c || 0xA == c){GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, _env, c);}
 #endif  
 
 /*
  * Read character including new line until a non white space character is met.
  */
 #ifndef GUTHTHILA_SKIP_SPACES_WITH_NEW_LINE
-#define GUTHTHILA_SKIP_SPACES_WITH_NEW_LINE(m, c, reader_type, _env) while (0x20 == c || 0x9 == c || 0xD == c || 0xA == c || '\n' == c){GUTHTHILA_NEXT_CHAR(m, reader_type, _env, c);}
+#define GUTHTHILA_SKIP_SPACES_WITH_NEW_LINE(m, c, buffer, data_size, previous_size, _env) while (0x20 == c || 0x9 == c || 0xD == c || 0xA == c || '\n' == c){GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, _env, c);}
 #endif  
 
 #ifndef GUTHTHILA_XML_NAME
@@ -141,11 +171,11 @@
  * Read until we met a = character.
  */
 #ifndef GUTHTHILA_PROCESS_EQU
-#define GUTHTHILA_PROCESS_EQU(m, c, ic, reader_type, _env)							\
-    GUTHTHILA_SKIP_SPACES(m, c, reader_type, _env); \
+#define GUTHTHILA_PROCESS_EQU(m, c, ic, buffer, data_size, previous_size, _env)							\
+    GUTHTHILA_SKIP_SPACES(m, c, buffer, data_size, previous_size, _env); \
     if (0x3D == c) { \
-    GUTHTHILA_NEXT_CHAR(m, reader_type, _env, ic); \
-    GUTHTHILA_SKIP_SPACES(m, ic, reader_type, _env); \
+    GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, _env, ic); \
+    GUTHTHILA_SKIP_SPACES(m, ic, buffer, data_size, previous_size, _env); \
     }
 #endif  
 
@@ -174,7 +204,7 @@
  * return false immediately.
  */
 #ifndef GUTHTHILA_IS_SPACE
-#define GUTHTHILA_IS_SPACE(c) ((c < 0x21) && (0x20 == c || 0xD == c || 0xA == c || 0x9 == c))
+#define GUTHTHILA_IS_SPACE(c) ((c < 0x21) && (c == 0x20 || c == 0xD || c == 0xA || c == 0x9))
 #endif  
 
 /*
@@ -699,7 +729,10 @@
     int c = -1;
     guththila_attr_t * attr = NULL;
     int size = 0, i = 0, nmsp_counter, loop = 0, white_space = 0;
-    int reader_type = m->reader->type;
+    int data_size = -1;
+    int previous_size = -1;
+    guththila_char_t *buffer = NULL;
+
     /* Need to release the resources for attributes */
     size = GUTHTHILA_STACK_SIZE(m->attrib);
     for(i = 0; i < size; i++)
@@ -755,12 +788,12 @@
     do
     {
         loop = 0;
-        GUTHTHILA_NEXT_CHAR(m, reader_type, env, c);
+        GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, env, c);
         if(m->status == S_1)
         {
             while(isspace(c))
             {
-                GUTHTHILA_NEXT_CHAR(m, reader_type, env, c);
+                GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, env, c);
                 if(c < 0)
                     return -1;
             }
@@ -775,27 +808,27 @@
         }
         if('<' == c && m->status == S_2)
         {
-            GUTHTHILA_NEXT_CHAR(m, reader_type, env, c);
+            GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, env, c);
             if(c != '?' && c != '!' && c != '/')
             {
                 /* We are at the beginig of a xml element */
                 if(GUTHTHILA_IS_VALID_STARTING_CHAR(c))
                 {
                     GUTHTHILA_TOKEN_OPEN(m, tok, env);
-                    GUTHTHILA_NEXT_CHAR(m, reader_type, env, c);
+                    GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, env, c);
                     while(!GUTHTHILA_IS_SPACE(c) && c != '>' && c != '/')
                     {
                         if(c < 0)
                             return -1;
                         if(c != ':')
                         {
-                            GUTHTHILA_NEXT_CHAR(m, reader_type, env, c);
+                            GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, env, c);
                         }
                         else
                         {
                             /* We know for sure that this is a prefix */
                             guththila_token_close(m, tok, _prefix, 0, env);
-                            GUTHTHILA_NEXT_CHAR(m, reader_type, env, c);
+                            GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, env, c);
                             GUTHTHILA_TOKEN_OPEN(m, tok, env);
                         }
                     }
@@ -810,14 +843,14 @@
                     guththila_stack_push(&m->elem, elem, env);
 #endif  
                 }
-                GUTHTHILA_SKIP_SPACES(m, c, reader_type, env);
+                GUTHTHILA_SKIP_SPACES(m, c, buffer, data_size, previous_size, env);
                 /* Process the attributes */
                 for(;;)
                 {
                     /* Empty element */
                     if(c == '/')
                     {
-                        GUTHTHILA_NEXT_CHAR(m, reader_type, env, c);
+                        GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, env, c);
                         if(c == '>')
                         {
                             m->guththila_event = GUTHTHILA_EMPTY_ELEMENT;
@@ -848,20 +881,20 @@
                         {
                             GUTHTHILA_TOKEN_OPEN(m, tok, env);
 
-                            GUTHTHILA_NEXT_CHAR(m, reader_type, env, c);
+                            GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, env, c);
                             while(!GUTHTHILA_IS_SPACE(c) && c != '=')
                             {
                                 if(c < 0)
                                     return -1;
                                 if(c != ':')
                                 {
-                                    GUTHTHILA_NEXT_CHAR(m, reader_type, env, c);
+                                    GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, env, c);
                                 }
                                 else if(c == ':')
                                 {
                                     /* Prefix */
                                     guththila_token_close(m, tok, _prefix, 0, env);
-                                    GUTHTHILA_NEXT_CHAR(m, reader_type, env, c);
+                                    GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, env, c);
                                     GUTHTHILA_TOKEN_OPEN(m, tok, env);
                                 }
                             }
@@ -873,20 +906,20 @@
                             return -1;
                         }
                         /* Attribute Value */
-                        GUTHTHILA_PROCESS_EQU(m, c, quote, reader_type, env);
+                        GUTHTHILA_PROCESS_EQU(m, c, quote, buffer, data_size, previous_size, env);
                         if('\'' == quote || '\"' == quote)
                         {
-                            GUTHTHILA_NEXT_CHAR(m, reader_type, env, c);
+                            GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, env, c);
                             GUTHTHILA_TOKEN_OPEN(m, tok, env);
                             while(c != quote)
                             {
                                 if(c < 0)
                                     return -1;
-                                GUTHTHILA_NEXT_CHAR(m, reader_type, env, c);
+                                GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, env, c);
                             }
                             guththila_token_close(m, tok, _attribute_value, 0, env);
-                            GUTHTHILA_NEXT_CHAR(m, reader_type, env, c);
-                            GUTHTHILA_SKIP_SPACES(m, c, reader_type, env);
+                            GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, env, c);
+                            GUTHTHILA_SKIP_SPACES(m, c, buffer, data_size, previous_size, env);
                         }
                         else
                         {
@@ -903,24 +936,24 @@
             {
                 /* End Element */
                 m->guththila_event = GUTHTHILA_END_ELEMENT;
-                GUTHTHILA_NEXT_CHAR(m, reader_type, env, c);
+                GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, env, c);
                 if(GUTHTHILA_IS_VALID_STARTING_CHAR(c))
                 {
                     GUTHTHILA_TOKEN_OPEN(m, tok, env);
-                    GUTHTHILA_NEXT_CHAR(m, reader_type, env, c);
+                    GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, env, c);
                     while(!GUTHTHILA_IS_SPACE(c) && c != '>')
                     {
                         if(c < 0)
                             return -1;
                         if(c != ':')
                         {
-                            GUTHTHILA_NEXT_CHAR(m, reader_type, env, c);
+                            GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, env, c);
                         }
                         else
                         {
                             /* Prefix */
                             guththila_token_close(m, tok, _prefix, 0, env);
-                            GUTHTHILA_NEXT_CHAR(m, reader_type, env, c);
+                            GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, env, c);
                             GUTHTHILA_TOKEN_OPEN(m, tok, env);
                         }
                     }
@@ -963,7 +996,7 @@
                         guththila_tok_list_release_token(&m->tokens, elem->prefix, env);
                     AXIS2_FREE(env->allocator, elem);
 #endif  
-                    GUTHTHILA_SKIP_SPACES(m, c, reader_type, env);
+                    GUTHTHILA_SKIP_SPACES(m, c, buffer, data_size, previous_size, env);
                     if(c != '>')
                         return -1;
                     return GUTHTHILA_END_ELEMENT;
@@ -977,11 +1010,11 @@
                     == c_arra[1])
                 {
                     int loop_state = 1;
-                    GUTHTHILA_NEXT_CHAR(m, reader_type, env, c);
+                    GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, env, c);
                     GUTHTHILA_TOKEN_OPEN(m, tok, env);
                     while(loop_state)
                     {
-                        GUTHTHILA_NEXT_CHAR(m, reader_type, env, c);
+                        GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, env, c);
                         if('-' == c)
                         {
                             if(2 == guththila_next_no_char(m, 0, c_arra, 2, env) && '-'
@@ -1006,12 +1039,12 @@
                 }
                 else
                 {
-                    GUTHTHILA_NEXT_CHAR(m, reader_type, env, c);
+                    GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, env, c);
                     while('<' != c)
                     {
                         if(c < 0)
                             return -1;
-                        GUTHTHILA_NEXT_CHAR(m, reader_type, env, c);
+                        GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, env, c);
                     }
                 }
             }
@@ -1035,15 +1068,40 @@
                 white_space = 1;
             GUTHTHILA_TOKEN_OPEN(m, tok, env);
 
+            /* code given below is having two do-while loop wrapped by another do-while loop.
+             * This is done to improve the performance for big messages. Most of the cases, the
+             * content will be not whitespace, so checking for whitespace even if we already found
+             * that we have some valid characters is big overhead. Hence better to do the looping
+             * separately to find white_space = false and normal case
+             */
             do
             {
-                GUTHTHILA_NEXT_CHAR(m, reader_type, env, c);
-                if(!GUTHTHILA_IS_SPACE(c) && c != '<')
-                    white_space = 0;
-                if(c < 0)
-                    return -1;
+                if(white_space)
+                {
+                    do
+                    {
+                        GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, env, c);
+                        if(!GUTHTHILA_IS_SPACE(c) && c != '<')
+                        {
+                            white_space = 0;
+                            break;
+                        }
+                    }
+                    while((c != '<') && (c >= 0));
+                }
+                else
+                {
+                    do
+                    {
+                        /*GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, env, c);*/
+                        c = guththila_search_for_start_element(m, env);
+                    }
+                    while((c != '<') && (c >= 0));
+                }
             }
-            while(c != '<');
+            while((c != '<') && (c >= 0));
+            if(c < 0)
+                return -1;
             guththila_token_close(m, tok, _text_data, ref, env);
             m->next--;
             if(white_space)
@@ -1085,12 +1143,14 @@
     int c = -1;
     int quote = -1;
     int nc = -1;
-    int reader_type = m->reader->type;
+    int data_size = -1;
+    int previous_size = -1;
+    guththila_char_t *buffer = NULL;
     if(3 == guththila_next_no_char(m, GUTHTHILA_EOF, c_arra, 3, env) && 'x' == c_arra[0] && 'm'
         == c_arra[1] && 'l' == c_arra[2])
     {
-        GUTHTHILA_NEXT_CHAR(m, reader_type, env, c);
-        GUTHTHILA_SKIP_SPACES(m, c, reader_type, env);
+        GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, env, c);
+        GUTHTHILA_SKIP_SPACES(m, c, buffer, data_size, previous_size, env);
         if(c == 'v')
         {
             GUTHTHILA_TOKEN_OPEN(m, tok, env);
@@ -1098,20 +1158,20 @@
                 == c_arra[1] && 's' == c_arra[2] && 'i' == c_arra[3] && 'o' == c_arra[4] && 'n'
                 == c_arra[5])
             {
-                GUTHTHILA_NEXT_CHAR(m, reader_type, env, c);
+                GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, env, c);
                 guththila_token_close(m, tok, _attribute_name, 0, env);
-                GUTHTHILA_PROCESS_EQU(m, c, quote, reader_type, env);
-                GUTHTHILA_NEXT_CHAR(m, reader_type, env, nc);
+                GUTHTHILA_PROCESS_EQU(m, c, quote, buffer, data_size, previous_size, env);
+                GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, env, nc);
                 GUTHTHILA_TOKEN_OPEN(m, tok, env);
                 while(nc != quote)
                 {
                     if(nc < 0)
                         return -1;
-                    GUTHTHILA_NEXT_CHAR(m, reader_type, env, nc);
+                    GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, env, nc);
                 }
                 guththila_token_close(m, tok, _attribute_value, 0, env);
-                GUTHTHILA_NEXT_CHAR(m, reader_type, env, c);
-                GUTHTHILA_SKIP_SPACES(m, c, reader_type, env);
+                GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, env, c);
+                GUTHTHILA_SKIP_SPACES(m, c, buffer, data_size, previous_size, env);
             }
             else
             {
@@ -1125,20 +1185,20 @@
                 == c_arra[1] && 'o' == c_arra[2] && 'd' == c_arra[3] && 'i' == c_arra[4] && 'n'
                 == c_arra[5] && 'g' == c_arra[6])
             {
-                GUTHTHILA_NEXT_CHAR(m, reader_type, env, c);
+                GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, env, c);
                 guththila_token_close(m, tok, _attribute_name, 0, env);
-                GUTHTHILA_PROCESS_EQU(m, c, quote, reader_type, env);
-                GUTHTHILA_NEXT_CHAR(m, reader_type, env, nc);
+                GUTHTHILA_PROCESS_EQU(m, c, quote, buffer, data_size, previous_size, env);
+                GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, env, nc);
                 GUTHTHILA_TOKEN_OPEN(m, tok, env);
                 while(nc != quote)
                 {
                     if(nc < 0)
                         return -1;
-                    GUTHTHILA_NEXT_CHAR(m, reader_type, env, c);
+                    GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, env, c);
                 }
                 guththila_token_close(m, tok, _attribute_value, 0, env);
-                GUTHTHILA_NEXT_CHAR(m, reader_type, env, c);
-                GUTHTHILA_SKIP_SPACES(m, c, reader_type, env);
+                GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, env, c);
+                GUTHTHILA_SKIP_SPACES(m, c, buffer, data_size, previous_size, env);
             }
         }
         if(c == 's')
@@ -1148,26 +1208,26 @@
                 == c_arra[1] && 'n' == c_arra[2] && 'd' == c_arra[3] && 'a' == c_arra[4] && 'l'
                 == c_arra[5] && 'o' == c_arra[6] && 'n' == c_arra[7] && 'e' == c_arra[8])
             {
-                GUTHTHILA_NEXT_CHAR(m, reader_type, env, c);
+                GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, env, c);
                 guththila_token_close(m, tok, _attribute_name, 0, env);
-                GUTHTHILA_PROCESS_EQU(m, c, quote, reader_type, env);
-                GUTHTHILA_NEXT_CHAR(m, reader_type, env, nc);
+                GUTHTHILA_PROCESS_EQU(m, c, quote, buffer, data_size, previous_size, env);
+                GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, env, nc);
                 GUTHTHILA_TOKEN_OPEN(m, tok, env);
                 while(nc != quote)
                 {
                     if(nc < 0)
                         return -1;
-                    GUTHTHILA_NEXT_CHAR(m, reader_type, env, nc);
+                    GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, env, nc);
                 }
                 guththila_token_close(m, tok, _attribute_value, 0, env);
-                GUTHTHILA_NEXT_CHAR(m, reader_type, env, c);
-                GUTHTHILA_SKIP_SPACES(m, c, reader_type, env);
+                GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, env, c);
+                GUTHTHILA_SKIP_SPACES(m, c, buffer, data_size, previous_size, env);
             }
         }
         if(c == '?')
         {
             int nc;
-            GUTHTHILA_NEXT_CHAR(m, reader_type, env, nc);
+            GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, env, nc);
             if('>' == nc)
             {
                 m->guththila_event = GUTHTHILA_START_DOCUMENT;
@@ -1756,3 +1816,40 @@
     return -1;
 }
 
+static int
+guththila_search_for_start_element(
+    guththila_t *m,
+    const axutil_env_t *env)
+{
+    guththila_char_t *buffer = NULL;
+    int data_size = -1;
+    int previous_size = -1;
+
+    int c;
+    do
+    {
+        if(buffer)
+        {
+            guththila_char_t *pos = NULL;
+            int index = m->next - previous_size;
+            pos = (guththila_char_t*)memchr(buffer + index, '<', data_size - index);
+            if(pos)
+            {
+                m->next += pos - (buffer + index);
+            }
+            else
+            {
+                m->next = previous_size + data_size;
+            }
+
+            GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, env, c);
+        }
+        else
+        {
+            GUTHTHILA_NEXT_CHAR(m, buffer, data_size, previous_size, env, c);
+        }
+    }while((c != '<') && (c >= 0));
+
+    return c;
+}
+