You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@daffodil.apache.org by GitBox <gi...@apache.org> on 2020/12/14 15:53:55 UTC

[GitHub] [incubator-daffodil] mbeckerle commented on a change in pull request #466: Expand C code generator to handle more integer types

mbeckerle commented on a change in pull request #466:
URL: https://github.com/apache/incubator-daffodil/pull/466#discussion_r542466233



##########
File path: daffodil-runtime2/src/main/resources/c/libcli/xml_reader.c
##########
@@ -184,10 +184,50 @@ xmlInt32Elem(XMLReader *reader, const ERD *erd, int32_t *location)
     {
         if (strcmp(name_from_xml, name_from_erd) == 0)
         {
-            // Check for any errors getting the 32-bit integer
+            // Check for any errors getting the integer number
             const char *errstr = NULL;
-            *location = (int32_t)strtonum(number_from_xml, INT32_MIN, INT32_MAX,
-                                          &errstr);
+
+            // Need to handle varying bit lengths and signedness

Review comment:
       Is this a TODO? If so add "TODO", otherwise remove "Need to"

##########
File path: daffodil-runtime2/src/main/resources/c/libcli/xml_writer.c
##########
@@ -90,29 +90,64 @@ xmlEndComplex(XMLWriter *writer, const InfosetBase *base)
     return complex ? NULL : "Underflowed the XML stack";
 }
 
-// Write 32-bit integer value as element
+// Write 8, 16, 32, or 64-bit signed/unsigned integer as element
 
 static const char *
-xmlInt32Elem(XMLWriter *writer, const ERD *erd, const int32_t *location)
+xmlIntegerElem(XMLWriter *writer, const ERD *erd, const void *intLocation)
 {
     mxml_node_t *parent = stack_top(&writer->stack);
     const char * name = get_erd_name(erd);
-    const char * xmlns = get_erd_xmlns(erd);
     mxml_node_t *simple = mxmlNewElement(parent, name);
-    int32_t      value = *location;
-    mxml_node_t *text = mxmlNewOpaquef(simple, "%i", value);
+
+    // Set namespace declaration if necessary
+    const char *xmlns = get_erd_xmlns(erd);
     if (xmlns)
     {
         const char *ns = get_erd_ns(erd);
         mxmlElementSetAttr(simple, xmlns, ns);
     }
-    return (simple && text) ? NULL : "Error making new int32 simple element";
+
+    // Need to handle varying bit lengths and signedness

Review comment:
       "need to" again

##########
File path: daffodil-runtime2/src/main/resources/examples/ex_int32.c
##########
@@ -25,15 +25,15 @@
 static void        c2_initSelf(c2 *instance);
 static const char *c2_parseSelf(c2 *instance, const PState *pstate);
 static const char *c2_unparseSelf(const c2 *instance, const UState *ustate);
-static void        c1_initSelf(c1 *instance);
-static const char *c1_parseSelf(c1 *instance, const PState *pstate);
-static const char *c1_unparseSelf(const c1 *instance, const UState *ustate);
+static void        ex_int32_initSelf(ex_int32 *instance);

Review comment:
       is this example for signed int 32 or unsigned. There's some mixing below. 

##########
File path: daffodil-runtime2/src/main/resources/examples/ex_int32.c
##########
@@ -99,42 +99,45 @@ static const ERD c2_ERD = {
     (ERDUnparseSelf)&c2_unparseSelf, // unparseSelf
 };
 
-static const c1 c1_compute_ERD_offsets;
+static const ex_int32 ex_int32_compute_ERD_offsets;
 
-static const ptrdiff_t c1_offsets[2] = {
-    (char *)&c1_compute_ERD_offsets.e1 - (char *)&c1_compute_ERD_offsets,
-    (char *)&c1_compute_ERD_offsets.c2 - (char *)&c1_compute_ERD_offsets};
+static const ptrdiff_t ex_int32_offsets[2] = {
+    (char *)&ex_int32_compute_ERD_offsets.e1 - (char *)&ex_int32_compute_ERD_offsets,
+    (char *)&ex_int32_compute_ERD_offsets.c2 - (char *)&ex_int32_compute_ERD_offsets};
 
-static const ERD *c1_childrenERDs[2] = {&e1_ERD, &c2_ERD};
+static const ERD *ex_int32_childrenERDs[2] = {&e1_ERD, &c2_ERD};
 
-static const ERD c1_ERD = {
+static const ERD ex_int32_ERD = {
     {
-        "ex",                 // namedQName.prefix
-        "c1",                 // namedQName.local
+        NULL, // namedQName.prefix
+        "ex_int32", // namedQName.local
         "http://example.com", // namedQName.ns
     },
     COMPLEX,                         // typeCode
     2,                               // numChildren
-    c1_offsets,                      // offsets
-    c1_childrenERDs,                 // childrenERDs
-    (ERDInitSelf)&c1_initSelf,       // initSelf
-    (ERDParseSelf)&c1_parseSelf,     // parseSelf
-    (ERDUnparseSelf)&c1_unparseSelf, // unparseSelf
+    ex_int32_offsets,                      // offsets
+    ex_int32_childrenERDs,                 // childrenERDs
+    (ERDInitSelf)&ex_int32_initSelf,       // initSelf
+    (ERDParseSelf)&ex_int32_parseSelf,     // parseSelf
+    (ERDUnparseSelf)&ex_int32_unparseSelf, // unparseSelf
 };
 
 // Return a root element to be used for parsing or unparsing
 
 InfosetBase *
 rootElement()
 {
-    static c1    instance;
+    static ex_int32    instance;
     InfosetBase *root = &instance._base;
-    c1_ERD.initSelf(root);
+    ex_int32_ERD.initSelf(root);
     return root;
 }
 
 // Methods to initialize, parse, and unparse infoset nodes
 
+static inline uint8_t be8toh(uint8_t be8b) { return be8b; }

Review comment:
       Add a comment. I don't understand the point of these functions that just return the arg. They appear to be degenerate cases of endian-sensitive integer transformations for the 1-byte case. Do you use these?
   
   In this naming schema "beNtoh" what's the "h" stand for?

##########
File path: daffodil-runtime2/src/main/resources/examples/ex_int32.c
##########
@@ -149,7 +152,7 @@ c2_parseSelf(c2 *instance, const PState *pstate)
     const char *error_msg = NULL;
     if (!error_msg)
     {
-        char   buffer[4];
+        char   buffer[sizeof(uint32_t)];

Review comment:
       Why does this use uint32_t instead of int32_t ? For consistency's sake, we should use int32_t I think. 
   

##########
File path: daffodil-runtime2/src/main/resources/examples/ex_int32.c
##########
@@ -206,20 +209,20 @@ c2_unparseSelf(const c2 *instance, const UState *ustate)
 }
 
 static void
-c1_initSelf(c1 *instance)
+ex_int32_initSelf(ex_int32 *instance)
 {
     instance->e1 = 0xCDCDCDCD;
     c2_initSelf(&instance->c2);
-    instance->_base.erd = &c1_ERD;
+    instance->_base.erd = &ex_int32_ERD;
 }
 
 static const char *
-c1_parseSelf(c1 *instance, const PState *pstate)
+ex_int32_parseSelf(ex_int32 *instance, const PState *pstate)
 {
     const char *error_msg = NULL;
     if (!error_msg)
     {
-        char   buffer[4];
+        char   buffer[sizeof(uint32_t)];

Review comment:
       same issue in this file. SHould be int32_t ?

##########
File path: daffodil-runtime2/src/main/scala/org/apache/daffodil/runtime2/generators/CodeGeneratorState.scala
##########
@@ -260,6 +272,9 @@ class CodeGeneratorState {
          |
          |// Methods to initialize, parse, and unparse infoset nodes
          |
+         |static inline uint8_t be8toh(uint8_t be8b) { return be8b; }

Review comment:
       So are these here because they aren't in the standard library for this sort of thing, yet we generate calls to a converter with this naming convention even for the 8-bit case? 

##########
File path: daffodil-runtime2/src/main/scala/org/apache/daffodil/runtime2/generators/BinaryIntegerKnownLengthCodeGenerator.scala
##########
@@ -36,50 +38,47 @@ trait BinaryIntegerKnownLengthCodeGenerator {
       val bo = e.byteOrderEv.constValue
       bo
     }
-    // For the time being this is a very limited back end.
-    // So there are numerous restrictions to enforce.
-    e.schemaDefinitionUnless(lengthInBits <= 64, "Length must be 64 bits or less, but was: %s", lengthInBits)
-    if (lengthInBits == 64 && !g.signed)
-      e.SDE("Integers of 64 bits length must be signed.")
 
     // This insures we can use regular java.io library calls.
     if (e.alignmentValueInBits.intValue() % 8 != 0)
       e.SDE("Only alignment to 8-bit (1 byte) boundaries is supported.")
 
     // The restrictions below are ones we want to eventually lift.
-    if (lengthInBits != 32)
-      e.SDE("Lengths other than 32 bits are not supported.")
-
     if (byteOrder ne ByteOrder.BigEndian)
       e.SDE("Only dfdl:byteOrder 'bigEndian' is supported.")
 
     if (e.bitOrder ne BitOrder.MostSignificantBitFirst)
       e.SDE("Only dfdl:bitOrder 'mostSignificantBitFirst' is supported.")
 
-    if (!g.signed)
-      e.SDE("Only signed integers are supported.")
-
-    val initStatement = s"    instance->$fieldName = 0xCDCDCDCD;"
+    // Start generating code snippets
+    val initialValue = lengthInBits match {
+      case 8 => "0xCD"
+      case 16 => "0xCDCD"
+      case 32 => "0xCDCDCDCD"
+      case 64 => "0xCDCDCDCDCDCDCDCD"
+      case _ => e.SDE("Lengths other than 8, 16, 32, or 64 bits are not supported.")

Review comment:
       If this is just a noise pattern so that it is not zero but visibly recognizable on dumps, just add a comment to that effect.
   
   I think this does *not* need to be the dfdl:fillByte as these integers are written atomically, all bytes; hence, the pattern will only be visible in a debugger for the time between this initialization and when the location is written with the integer value. 




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org