You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@corinthia.apache.org by gb...@apache.org on 2015/04/24 15:31:29 UTC

incubator-corinthia git commit: Copy parts of src/word/lenses to src/odf/lenses/.

Repository: incubator-corinthia
Updated Branches:
  refs/heads/master ff322cb4f -> 83720d9dd


Copy parts of src/word/lenses to src/odf/lenses/.

* DocFormats/filters/odf/src/text/ODFText.c

  (ODFTextGet): Uncomment section.  Function now 'works'.

* DocFormats/filters/ooxml/CMakeLists.txt

  (set): Add new group: GroupOOXMLODFLenses.
         Add files: src/odf/lenses/ODFField.*

  (add_library): Add groups GroupOOXMODF and GroupOOXMLODFLenses.

* DocFormats/filters/ooxml/src/odf/ODFConverter.c

  (#include): Add temporary stdio.h for easy debugging.

  (WordConverterGetConcrete): Change Error message.  Add temporary
    magic number in lieu of OFFICE_DOCUMENT not seeming to be the
    right choice.  Move commmenting out below ODF_simplifyFields()
    call.

* DocFormats/filters/ooxml/src/odf/lenses/ODFField.h

  (): New file.  String replaced copy of  /word/lenses/

* DocFormats/filters/ooxml/src/odf/lenses/ODFField.c

  (): New file.  String replaced copy of  /word/lenses/


Project: http://git-wip-us.apache.org/repos/asf/incubator-corinthia/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-corinthia/commit/83720d9d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-corinthia/tree/83720d9d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-corinthia/diff/83720d9d

Branch: refs/heads/master
Commit: 83720d9ddad4d1021e3cf0b416ebd7a761771cae
Parents: ff322cb
Author: Gabriela Gibson <gb...@apache.org>
Authored: Fri Apr 24 14:31:43 2015 +0100
Committer: Gabriela Gibson <gb...@apache.org>
Committed: Fri Apr 24 14:31:43 2015 +0100

----------------------------------------------------------------------
 DocFormats/filters/odf/src/text/ODFText.c       |   4 +-
 DocFormats/filters/ooxml/CMakeLists.txt         |  10 +-
 DocFormats/filters/ooxml/src/odf/ODFConverter.c |  13 +-
 .../filters/ooxml/src/odf/lenses/ODFField.c     | 494 +++++++++++++++++++
 .../filters/ooxml/src/odf/lenses/ODFField.h     |  27 +
 5 files changed, 540 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/83720d9d/DocFormats/filters/odf/src/text/ODFText.c
----------------------------------------------------------------------
diff --git a/DocFormats/filters/odf/src/text/ODFText.c b/DocFormats/filters/odf/src/text/ODFText.c
index 683e2d6..417aea9 100644
--- a/DocFormats/filters/odf/src/text/ODFText.c
+++ b/DocFormats/filters/odf/src/text/ODFText.c
@@ -41,7 +41,7 @@ DFDocument *ODFTextGet(DFStorage *concreteStorage, DFStorage *abstractStorage, c
     ok = 1;
     
 end:
-    /*
+
     ODFPackageRelease(odfPackage);
     if (ok) {
         return htmlDoc;
@@ -50,7 +50,7 @@ end:
         DFDocumentRelease(htmlDoc);
         return NULL;
     }
-    */
+
     return NULL;
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/83720d9d/DocFormats/filters/ooxml/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/DocFormats/filters/ooxml/CMakeLists.txt b/DocFormats/filters/ooxml/CMakeLists.txt
index 4696309..3ba0b01 100644
--- a/DocFormats/filters/ooxml/CMakeLists.txt
+++ b/DocFormats/filters/ooxml/CMakeLists.txt
@@ -29,6 +29,13 @@ set(GroupOOXMLODF
     src/odf/ODFConverter.c
     src/odf/ODFConverter.h)
 
+###
+## group ooxml odf lenses objects
+###
+set(GroupOOXMLODFLenses
+    src/odf/lenses/ODFField.c
+    src/odf/lenses/ODFField.h)
+    
 
 ###
 ## group ooxml word objects
@@ -160,7 +167,8 @@ add_library(ooxml OBJECT
     ${GroupOOXMLWordFormatting}
     ${GroupOOXMLWordLenses}
     ${GroupOOXMLWordTests}
-    ${GroupOOXMLODF})
+    ${GroupOOXMLODF}
+    ${GroupOOXMLODFLenses})
 
 source_group(src\\common           FILES ${GroupOOXMLCommon})
 source_group(src\\word             FILES ${GroupOOXMLWord})

http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/83720d9d/DocFormats/filters/ooxml/src/odf/ODFConverter.c
----------------------------------------------------------------------
diff --git a/DocFormats/filters/ooxml/src/odf/ODFConverter.c b/DocFormats/filters/ooxml/src/odf/ODFConverter.c
index c7d79e8..3a21567 100644
--- a/DocFormats/filters/ooxml/src/odf/ODFConverter.c
+++ b/DocFormats/filters/ooxml/src/odf/ODFConverter.c
@@ -54,6 +54,8 @@
 #include <stdlib.h>
 #include <string.h>
 
+// 1i: debugging include --- remove
+#include <stdio.h>
 /*
 static int isWhitespaceRun(DFNode *run)
 {
@@ -688,20 +690,21 @@ DFNode *WordConverterGetConcrete(WordPutData *put, DFNode *abstract)
 
 int ODFConverterGet(DFDocument *html, DFStorage *abstractStorage, ODFPackage *package, const char *idPrefix, DFError **error)
 {
-    // 1i: contentDoc is a crude guess here.
     if (package->contentDoc == NULL) {
-        DFErrorFormat(error,"document.xml not found");
+        DFErrorFormat(error,"content.xml not found");
         return 0;
     }
     
-    // 1i: asssuming that OFFIC means AOO and so the WORD_DOCUMENT equivalent is OFFICE_DOCUMENT
-    DFNode *odfDocument = DFChildWithTag(package->contentDoc->docNode,OFFICE_DOCUMENT);
+    // 1i: this line needs work on the xml tags.
+    printf("OFFICE_DOCUMENT is %d\n", OFFICE_DOCUMENT);
+    DFNode *odfDocument = DFChildWithTag(package->contentDoc->docNode,1469 /* magic number for what I found in gdb */);
     if (odfDocument == NULL) {
         DFErrorFormat(error,"odf:document not found");
         return 0;
     }
+
+    int haveFields = ODF_simplifyFields(package);
     /*
-    int haveFields = Word_simplifyFields(package);
     Word_mergeRuns(package);
 
     WordConverter *converter = WordConverterNew(html,abstractStorage,package,idPrefix);

http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/83720d9d/DocFormats/filters/ooxml/src/odf/lenses/ODFField.c
----------------------------------------------------------------------
diff --git a/DocFormats/filters/ooxml/src/odf/lenses/ODFField.c b/DocFormats/filters/ooxml/src/odf/lenses/ODFField.c
new file mode 100644
index 0000000..e7abf45
--- /dev/null
+++ b/DocFormats/filters/ooxml/src/odf/lenses/ODFField.c
@@ -0,0 +1,494 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "DFPlatform.h"
+#include "ODFField.h"
+/*
+#include "ODFLenses.h"
+#include "ODFBookmark.h"
+#include "ODFObjects.h"
+#include "ODFPackage.h"
+#include "ODFCaption.h"
+*/
+
+#include "DFDOM.h"
+#include "DFXML.h"
+#include "DFString.h"
+#include "DFArray.h"
+#include "DFCommon.h"
+#include <assert.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <string.h>
+
+/*
+static void ODFFieldPut(ODFPutData *put, DFNode *abstract, DFNode *concrete);
+
+const char **ODF_parseField(const char *str)
+{
+    size_t len = strlen(str);
+    DFArray *components = DFArrayNew((DFCopyFunction)xstrdup,free);
+
+    size_t start = 0;
+    int inString = 0;
+    for (size_t pos = 0; pos <= len; pos++) {
+        if (inString) {
+            if ((pos == len) || (str[pos] == '"')) {
+                char *comp = DFSubstring(str,start,pos);
+                DFArrayAppend(components,(char *)comp);
+                free(comp);
+                start = pos+1;
+                inString = 0;
+            }
+        }
+        else {
+            if ((pos == len) || isspace(str[pos])) {
+                if (pos > start) {
+                    char *comp = DFSubstring(str,start,pos);
+                    DFArrayAppend(components,(char *)comp);
+                    free(comp);
+                }
+                start = pos+1;
+            }
+            else if (str[pos] == '"') {
+                inString = 1;
+                start = pos+1;
+            }
+        }
+    }
+
+    const char **result = DFStringArrayFlatten(components);
+    DFArrayRelease(components);
+    return result;
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                                                                                                //
+//                                       DOM helper methods                                       //
+//                                                                                                //
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+typedef struct {
+    DFNode *commonAncestor;
+    DFNode *beginAncestor;
+    DFNode *endAncestor;
+} CommonAncestorInfo;
+
+static CommonAncestorInfo findCommonAncestor(DFNode *beginNode, DFNode *endNode)
+{
+    CommonAncestorInfo info = { NULL, NULL, NULL };
+    for (DFNode *beginA = beginNode; beginA != NULL; beginA = beginA->parent) {
+        for (DFNode *endA = endNode; endA != NULL; endA = endA->parent) {
+            if (beginA->parent == endA->parent) {
+                info.commonAncestor = beginA->parent;
+                info.beginAncestor = beginA;
+                info.endAncestor = endA;
+                return info;
+            }
+        }
+    }
+    return info;
+}
+
+static void removeNodes(DFNode *beginNode, DFNode *endNode)
+{
+    CommonAncestorInfo common = findCommonAncestor(beginNode,endNode);
+    assert(common.commonAncestor != NULL);
+    assert(common.beginAncestor != NULL);
+    assert(common.endAncestor != NULL);
+
+    DFNode *begin = beginNode;
+    while (begin != common.beginAncestor) {
+        DFNode *parent = begin->parent;
+        if (begin->next != NULL)
+            DFRemoveNode(begin->next);
+        else
+            begin = parent;
+    }
+
+    DFNode *end = endNode;
+    while (end != common.endAncestor) {
+        DFNode *parent = end->parent;
+        if (end->prev != NULL)
+            DFRemoveNode(end->prev);
+        else
+            end = parent;
+    }
+
+    if (common.beginAncestor != common.endAncestor) {
+        while (common.beginAncestor->next != common.endAncestor)
+            DFRemoveNode(common.beginAncestor->next);
+    }
+
+    while ((beginNode != NULL) && (beginNode->first == NULL) && (beginNode->tag != WORD_DOCUMENT)) {
+        DFNode *parent = beginNode->parent;
+        DFRemoveNode(beginNode);
+        beginNode = parent;
+    }
+
+    while ((endNode != NULL) && (endNode->first == NULL) && (endNode->tag != WORD_DOCUMENT)) {
+        DFNode *parent = endNode->parent;
+        DFRemoveNode(endNode);
+        endNode = parent;
+    }
+}
+*/
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                                                                                                //
+//                                       ODFSimplification                                       //
+//                                                                                                //
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+typedef struct ODFSimplification {
+    DFBuffer *instrText;
+    DFNode *beginNode;
+    DFNode *endNode;
+    int haveFields;
+    int inSeparate;
+    int depth;
+} ODFSimplification;
+
+static void replaceField(ODFSimplification *simp)
+{
+    assert(simp->instrText != NULL);
+    assert(simp->beginNode != NULL);
+    assert(simp->endNode != NULL);
+
+    if ((simp->beginNode->parent->tag == WORD_R) && (simp->endNode->parent->tag == WORD_R)) {
+        DFNode *beginRun = simp->beginNode->parent;
+
+        DFNode *simple = DFCreateElement(simp->beginNode->doc,WORD_FLDSIMPLE);
+        DFSetAttribute(simple,WORD_INSTR,simp->instrText->data);
+        DFInsertBefore(beginRun->parent,simple,beginRun);
+        // 1i: plug in later
+        //removeNodes(simp->beginNode,simp->endNode);
+    }
+
+    DFBufferRelease(simp->instrText);
+    simp->instrText = NULL;
+    simp->beginNode = NULL;
+    simp->endNode = NULL;
+
+    simp->haveFields = 1;
+}
+
+static void simplifyRecursive(ODFSimplification *simp, DFNode *node)
+{
+    switch (node->tag) {
+        case WORD_FLDCHAR: {
+            const char *type = DFGetAttribute(node,WORD_FLDCHARTYPE);
+            if (DFStringEquals(type,"begin")) {
+                if (simp->depth == 0) {
+                    DFBufferRelease(simp->instrText);
+                    simp->instrText = DFBufferNew();
+                    simp->beginNode = node;
+                    simp->endNode = NULL;
+                    simp->inSeparate = 0;
+                }
+                simp->depth++;
+            }
+            else if (DFStringEquals(type,"end") && (simp->depth > 0)) {
+                simp->depth--;
+                if (simp->depth == 0) {
+                    simp->endNode = node;
+                    replaceField(simp);
+                }
+            }
+            else if (DFStringEquals(type,"separate")) {
+                if (simp->depth == 1)
+                    simp->inSeparate = 1;
+            }
+            break;
+        }
+        case WORD_INSTRTEXT: {
+            if ((simp->depth == 1) && !simp->inSeparate) {
+                char *value = DFNodeTextToString(node);
+                DFBufferFormat(simp->instrText,"%s",value);
+                free(value);
+            }
+            break;
+        }
+    }
+
+    DFNode *next;
+    for (DFNode *child = node->first; child != NULL; child = next) {
+        next = child->next;
+        simplifyRecursive(simp,child);
+    }
+}
+
+int ODF_simplifyFields(ODFPackage *package)
+{
+    ODFSimplification simp;
+    bzero(&simp,sizeof(ODFSimplification));
+    simplifyRecursive(&simp,package->contentDoc->docNode);
+    DFBufferRelease(simp.instrText);
+    return simp.haveFields;
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                                                                                                //
+//                                          ODFFieldLens                                         //
+//                                                                                                //
+////////////////////////////////////////////////////////////////////////////////////////////////////
+/*
+typedef enum {
+    ODFRefUnknown,
+    ODFRefNum,
+    ODFRefText,
+    ODFRefDirection,
+    ODFRefCaptionText,
+    ODFRefLabelNum,
+} ODFRefType;
+
+static const char *ODFRefTypeClassName(ODFRefType refType)
+{
+    switch (refType) {
+        case ODFRefText:
+            return DFRefTextClass;
+        case ODFRefDirection:
+            return DFRefDirectionClass;
+        case ODFRefCaptionText:
+            return DFRefCaptionTextClass;
+        case ODFRefLabelNum:
+            return DFRefLabelNumClass;
+        case ODFRefNum:
+        default:
+            return DFRefNumClass;
+    }
+}
+
+static ODFRefType ODFRefTypeGet(const char **args, ODFBookmark *bookmark)
+{
+    size_t argCount = DFStringArrayCount(args);
+    ODFRefType type = ODFRefText;
+
+    for (size_t argno = 2; argno < argCount; argno++) {
+        const char *arg = args[argno];
+        if (!strcmp(arg,"\\r"))
+            type = ODFRefNum; // Numbered reference (normal)
+        else if (!strcmp(arg,"\\n"))
+            type = ODFRefNum; // Numbered reference (no context)
+        else if (!strcmp(arg,"\\w"))
+            type = ODFRefNum; // Numbered reference (full context)
+        else if (!strcmp(arg,"\\p"))
+            type = ODFRefDirection;
+    }
+
+    if ((bookmark->type == ODFBookmarkTable) ||
+        (bookmark->type == ODFBookmarkFigure) ||
+        (bookmark->type == ODFBookmarkEquation)) {
+        if (type == ODFRefText) {
+            DFNode *p = ODFFindContainingParagraph(bookmark->element);
+            if (p != NULL) {
+                CaptionParts parts = ODFBookmarkGetCaptionParts(bookmark);
+
+                if (parts.beforeNum && !parts.num && !parts.afterNum)
+                    type = ODFRefCaptionText;
+                else if (parts.beforeNum && parts.num && !parts.afterNum)
+                    type = ODFRefLabelNum;
+            }
+        }
+    }
+
+    return type;
+}
+
+static DFNode *ODFFieldGet(ODFGetData *get, DFNode *concrete)
+{
+    if (concrete->tag != WORD_FLDSIMPLE)
+        return NULL;;
+
+    const char *instr = DFGetAttribute(concrete,WORD_INSTR);
+    if (instr != NULL) {
+        const char **args = ODF_parseField(instr);
+        size_t argCount = DFStringArrayCount(args);
+
+        if ((argCount >= 2) && !strcmp(args[0],"REF")) {
+            ODFBookmark *bookmark = ODFObjectsBookmarkWithName(get->conv->objects,args[1]);
+            if ((bookmark != NULL) && (bookmark->target != NULL)) {
+
+                ODFRefType type = ODFRefTypeGet(args,bookmark);
+
+                DFNode *a = ODFConverterCreateAbstract(get,HTML_A,concrete);
+                DFFormatAttribute(a,HTML_HREF,"#%s%u",get->conv->idPrefix,bookmark->target->seqNo);
+                DFSetAttribute(a,HTML_CLASS,ODFRefTypeClassName(type));
+
+                free(args);
+                return a;
+            }
+        }
+        else if ((argCount >= 1) && !strcmp(args[0],"TOC")) {
+
+            if ((argCount >= 2) && !strcmp(args[1],"\\o")) {
+                DFNode *nav = ODFConverterCreateAbstract(get,HTML_NAV,concrete);
+                DFSetAttribute(nav,HTML_CLASS,DFTableOfContentsClass);
+                free(args);
+                return nav;
+            }
+            else if ((argCount >= 3) && !strcmp(args[1],"\\c")) {
+                // FIXME: The names "Figure" and "Table" here will be different if the document
+                // was created in a language other than English. We need to look through the
+                // document to figure out which counter names are used in captions adjacent to
+                // figures and tables to know what the counter names used in the document
+                // actually are.
+
+                // Another option might be just to collect a static list of names used in all the
+                // major languages and base the detection on that. These would need to be checked
+                // with multiple versions of word, as the names used could in theory change
+                // between releases.
+
+                // We should keep track of a set of "document parameters", which record the names
+                // used for figure and table counters, as well as the prefixes used on numbered
+                // figures and tables. The latter would correspond to the content property of the
+                // caption::before and figcaption::before CSS rules.
+
+                if (!strcmp(args[2],"Figure")) {
+                    DFNode *nav = ODFConverterCreateAbstract(get,HTML_NAV,concrete);
+                    DFSetAttribute(nav,HTML_CLASS,DFListOfFiguresClass);
+                    free(args);
+                    return nav;
+                }
+                else if (!strcmp(args[2],"Table")) {
+                    DFNode *nav = ODFConverterCreateAbstract(get,HTML_NAV,concrete);
+                    DFSetAttribute(nav,HTML_CLASS,DFListOfTablesClass);
+                    free(args);
+                    return nav;
+                }
+            }
+        }
+
+        DFNode *span = ODFConverterCreateAbstract(get,HTML_SPAN,concrete);
+        DFSetAttribute(span,HTML_CLASS,DFFieldClass);
+        DFNode *text = DFCreateTextNode(get->conv->html,instr);
+        DFAppendChild(span,text);
+        free(args);
+        return span;
+    }
+    return NULL;
+}
+
+static int ODFFieldIsVisible(ODFPutData *put, DFNode *concrete)
+{
+    return 1;
+}
+
+static DFNode *ODFFieldCreate(ODFPutData *put, DFNode *abstract)
+{
+    DFNode *concrete = DFCreateElement(put->contentDoc,WORD_FLDSIMPLE);
+    // fldSimple elements are required to have an instr attribute (even if it's empty), so set
+    // it here in case update doesn't change it for some reason
+    DFSetAttribute(concrete,WORD_INSTR,"");
+    ODFFieldPut(put,abstract,concrete);
+    put->conv->haveFields = 1;
+    return concrete;
+}
+
+static const char *bookmarkNameForHtmlId(ODFConverter *converter, const char *htmlId, const char *refClass)
+{
+    DFNode *htmlElem = DFElementForIdAttr(converter->html,htmlId);
+    if (htmlElem == NULL)
+        return NULL;
+    switch (htmlElem->tag) {
+        case HTML_H1:
+        case HTML_H2:
+        case HTML_H3:
+        case HTML_H4:
+        case HTML_H5:
+        case HTML_H6: {
+            DFNode *labelSpan = htmlElem->first;
+            if ((labelSpan == NULL) || (labelSpan->tag != HTML_SPAN))
+                return NULL;;
+            const char *labelClass = DFGetAttribute(labelSpan,HTML_CLASS);
+            if (!DFStringEquals(labelClass,DFBookmarkClass))
+                return NULL;
+            return DFGetAttribute(labelSpan,WORD_NAME);
+        }
+        case HTML_FIGURE:
+        case HTML_TABLE: {
+            ODFCaption *caption = ODFObjectsCaptionForTarget(converter->objects,htmlElem);
+            if (caption == NULL)
+                return NULL;
+            if (DFStringEquals(refClass,DFRefTextClass) && (caption->textBookmark != NULL))
+                return caption->textBookmark->bookmarkName;
+            else if (DFStringEquals(refClass,DFRefLabelNumClass) && (caption->labelNumBookmark != NULL))
+                return caption->labelNumBookmark->bookmarkName;
+            else if (DFStringEquals(refClass,DFRefCaptionTextClass) && (caption->captionTextBookmark != NULL))
+                return caption->captionTextBookmark->bookmarkName;
+            else if (caption->textBookmark != NULL)
+                return caption->textBookmark->bookmarkName; // default is entire caption
+        }
+        default:
+            return NULL;
+    }
+}
+
+static void ODFFieldPut(ODFPutData *put, DFNode *abstract, DFNode *concrete)
+{
+    switch (abstract->tag) {
+        case HTML_SPAN: {
+            const char *className = DFGetAttribute(abstract,HTML_CLASS);
+            if (!DFStringEquals(className,DFFieldClass))
+                return;
+            char *text = DFNodeTextToString(abstract);
+            DFSetAttribute(concrete,WORD_INSTR,text);
+            free(text);
+            break;
+        }
+        case HTML_A: {
+            const char *href = DFGetAttribute(abstract,HTML_HREF);
+            if ((href == NULL) || (href[0] != '#'))
+                return;;
+
+            const char *targetId = &href[1];
+            const char *className = DFGetAttribute(abstract,HTML_CLASS);
+            if (className == NULL)
+                className = "";;
+            const char *bookmarkName = bookmarkNameForHtmlId(put->conv,targetId,className);
+            if (bookmarkName == NULL)
+                return;;
+
+            DFNode *htmlElem = DFElementForIdAttr(put->conv->html,targetId);
+            if ((htmlElem != NULL) && ((htmlElem->tag == HTML_TABLE) || (htmlElem->tag == HTML_FIGURE))) {
+                if (!DFStringEquals(className,DFRefTextClass) &&
+                    !DFStringEquals(className,DFRefLabelNumClass) &&
+                    !DFStringEquals(className,DFRefCaptionTextClass))
+                    className = DFRefTextClass;
+            }
+
+            if (DFStringEquals(className,DFRefTextClass) ||
+                DFStringEquals(className,DFRefLabelNumClass) ||
+                DFStringEquals(className,DFRefCaptionTextClass))
+                DFFormatAttribute(concrete,WORD_INSTR," REF %s \\h ",bookmarkName);
+            else if (DFStringEquals(className,DFRefDirectionClass))
+                DFFormatAttribute(concrete,WORD_INSTR," REF %s \\p \\h ",bookmarkName);
+            else
+                DFFormatAttribute(concrete,WORD_INSTR," REF %s \\r \\h ",bookmarkName);
+            break;
+        }
+    }
+}
+
+ODFLens ODFFieldLens = {
+    .isVisible = ODFFieldIsVisible,
+    .get = ODFFieldGet,
+    .put = ODFFieldPut,
+    .create = ODFFieldCreate,
+    .remove = NULL, // LENS FIXME
+};
+
+*/

http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/83720d9d/DocFormats/filters/ooxml/src/odf/lenses/ODFField.h
----------------------------------------------------------------------
diff --git a/DocFormats/filters/ooxml/src/odf/lenses/ODFField.h b/DocFormats/filters/ooxml/src/odf/lenses/ODFField.h
new file mode 100644
index 0000000..99e4252
--- /dev/null
+++ b/DocFormats/filters/ooxml/src/odf/lenses/ODFField.h
@@ -0,0 +1,27 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef DocFormats_ODFField_h
+#define DocFormats_ODFField_h
+
+#include "ODFPackage.h"
+
+const char **ODF_parseField(const char *cstr);
+
+int ODF_simplifyFields(ODFPackage *package);
+
+#endif


Re: incubator-corinthia git commit: Copy parts of src/word/lenses to src/odf/lenses/.

Posted by Peter Kelly <pm...@apache.org>.
I’d like to recommend a different strategy: Rather than copying across code from the Word filter, I think it would be easier to organically “grow” the code based on implementing support for the ODF types. I noticed that some of the code copied across is data structures and algorithms that are very specific to OOXML and don’t have any corresponding meaning in ODF - fields for example work completely differently.

The same goes for ODFCoverterGet; the set of steps you’d use pre- and post- main translation would be very different to those of word. Things like collapsing bookmarks, fixing list representation etc. similarly have no analogous implementation for ODF.

Here’s what I would recommend:

- Start by doing just an ODF to HTML translation, without regard to going the other way.
- Do a traversal of the DOM tree of the content.xml file from the ODF package, producing a HTML document as you go. You could do this in either one recursive function, or several different functions (one for each type, e.g. paragraph, table, list etc).
- This will give you a handle on the basic conversion process in action, and you can produce a number of test cases as you go. When you’ve got these I can help you get them into a similar format as the automated tests for Word; but we’ll cross that bridge when we come to it.
- After this, we can look at adding node identifiers to the generated ODF elements, and then implementing the put operatino

—
Dr Peter M. Kelly
pmkelly@apache.org

PGP key: http://www.kellypmk.net/pgp-key <http://www.kellypmk.net/pgp-key>
(fingerprint 5435 6718 59F0 DD1F BFA0 5E46 2523 BAA1 44AE 2966)

> On 24 Apr 2015, at 8:31 pm, gbg@apache.org wrote:
> 
> Repository: incubator-corinthia
> Updated Branches:
>  refs/heads/master ff322cb4f -> 83720d9dd
> 
> 
> Copy parts of src/word/lenses to src/odf/lenses/.
> 
> * DocFormats/filters/odf/src/text/ODFText.c
> 
>  (ODFTextGet): Uncomment section.  Function now 'works'.
> 
> * DocFormats/filters/ooxml/CMakeLists.txt
> 
>  (set): Add new group: GroupOOXMLODFLenses.
>         Add files: src/odf/lenses/ODFField.*
> 
>  (add_library): Add groups GroupOOXMODF and GroupOOXMLODFLenses.
> 
> * DocFormats/filters/ooxml/src/odf/ODFConverter.c
> 
>  (#include): Add temporary stdio.h for easy debugging.
> 
>  (WordConverterGetConcrete): Change Error message.  Add temporary
>    magic number in lieu of OFFICE_DOCUMENT not seeming to be the
>    right choice.  Move commmenting out below ODF_simplifyFields()
>    call.
> 
> * DocFormats/filters/ooxml/src/odf/lenses/ODFField.h
> 
>  (): New file.  String replaced copy of  /word/lenses/
> 
> * DocFormats/filters/ooxml/src/odf/lenses/ODFField.c
> 
>  (): New file.  String replaced copy of  /word/lenses/
> 
> 
> Project: http://git-wip-us.apache.org/repos/asf/incubator-corinthia/repo
> Commit: http://git-wip-us.apache.org/repos/asf/incubator-corinthia/commit/83720d9d
> Tree: http://git-wip-us.apache.org/repos/asf/incubator-corinthia/tree/83720d9d
> Diff: http://git-wip-us.apache.org/repos/asf/incubator-corinthia/diff/83720d9d
> 
> Branch: refs/heads/master
> Commit: 83720d9ddad4d1021e3cf0b416ebd7a761771cae
> Parents: ff322cb
> Author: Gabriela Gibson <gb...@apache.org>
> Authored: Fri Apr 24 14:31:43 2015 +0100
> Committer: Gabriela Gibson <gb...@apache.org>
> Committed: Fri Apr 24 14:31:43 2015 +0100
> 
> ----------------------------------------------------------------------
> DocFormats/filters/odf/src/text/ODFText.c       |   4 +-
> DocFormats/filters/ooxml/CMakeLists.txt         |  10 +-
> DocFormats/filters/ooxml/src/odf/ODFConverter.c |  13 +-
> .../filters/ooxml/src/odf/lenses/ODFField.c     | 494 +++++++++++++++++++
> .../filters/ooxml/src/odf/lenses/ODFField.h     |  27 +
> 5 files changed, 540 insertions(+), 8 deletions(-)
> ----------------------------------------------------------------------
> 
> 
> http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/83720d9d/DocFormats/filters/odf/src/text/ODFText.c
> ----------------------------------------------------------------------
> diff --git a/DocFormats/filters/odf/src/text/ODFText.c b/DocFormats/filters/odf/src/text/ODFText.c
> index 683e2d6..417aea9 100644
> --- a/DocFormats/filters/odf/src/text/ODFText.c
> +++ b/DocFormats/filters/odf/src/text/ODFText.c
> @@ -41,7 +41,7 @@ DFDocument *ODFTextGet(DFStorage *concreteStorage, DFStorage *abstractStorage, c
>     ok = 1;
> 
> end:
> -    /*
> +
>     ODFPackageRelease(odfPackage);
>     if (ok) {
>         return htmlDoc;
> @@ -50,7 +50,7 @@ end:
>         DFDocumentRelease(htmlDoc);
>         return NULL;
>     }
> -    */
> +
>     return NULL;
> }
> 
> 
> http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/83720d9d/DocFormats/filters/ooxml/CMakeLists.txt
> ----------------------------------------------------------------------
> diff --git a/DocFormats/filters/ooxml/CMakeLists.txt b/DocFormats/filters/ooxml/CMakeLists.txt
> index 4696309..3ba0b01 100644
> --- a/DocFormats/filters/ooxml/CMakeLists.txt
> +++ b/DocFormats/filters/ooxml/CMakeLists.txt
> @@ -29,6 +29,13 @@ set(GroupOOXMLODF
>     src/odf/ODFConverter.c
>     src/odf/ODFConverter.h)
> 
> +###
> +## group ooxml odf lenses objects
> +###
> +set(GroupOOXMLODFLenses
> +    src/odf/lenses/ODFField.c
> +    src/odf/lenses/ODFField.h)
> +    
> 
> ###
> ## group ooxml word objects
> @@ -160,7 +167,8 @@ add_library(ooxml OBJECT
>     ${GroupOOXMLWordFormatting}
>     ${GroupOOXMLWordLenses}
>     ${GroupOOXMLWordTests}
> -    ${GroupOOXMLODF})
> +    ${GroupOOXMLODF}
> +    ${GroupOOXMLODFLenses})
> 
> source_group(src\\common           FILES ${GroupOOXMLCommon})
> source_group(src\\word             FILES ${GroupOOXMLWord})
> 
> http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/83720d9d/DocFormats/filters/ooxml/src/odf/ODFConverter.c
> ----------------------------------------------------------------------
> diff --git a/DocFormats/filters/ooxml/src/odf/ODFConverter.c b/DocFormats/filters/ooxml/src/odf/ODFConverter.c
> index c7d79e8..3a21567 100644
> --- a/DocFormats/filters/ooxml/src/odf/ODFConverter.c
> +++ b/DocFormats/filters/ooxml/src/odf/ODFConverter.c
> @@ -54,6 +54,8 @@
> #include <stdlib.h>
> #include <string.h>
> 
> +// 1i: debugging include --- remove
> +#include <stdio.h>
> /*
> static int isWhitespaceRun(DFNode *run)
> {
> @@ -688,20 +690,21 @@ DFNode *WordConverterGetConcrete(WordPutData *put, DFNode *abstract)
> 
> int ODFConverterGet(DFDocument *html, DFStorage *abstractStorage, ODFPackage *package, const char *idPrefix, DFError **error)
> {
> -    // 1i: contentDoc is a crude guess here.
>     if (package->contentDoc == NULL) {
> -        DFErrorFormat(error,"document.xml not found");
> +        DFErrorFormat(error,"content.xml not found");
>         return 0;
>     }
> 
> -    // 1i: asssuming that OFFIC means AOO and so the WORD_DOCUMENT equivalent is OFFICE_DOCUMENT
> -    DFNode *odfDocument = DFChildWithTag(package->contentDoc->docNode,OFFICE_DOCUMENT);
> +    // 1i: this line needs work on the xml tags.
> +    printf("OFFICE_DOCUMENT is %d\n", OFFICE_DOCUMENT);
> +    DFNode *odfDocument = DFChildWithTag(package->contentDoc->docNode,1469 /* magic number for what I found in gdb */);
>     if (odfDocument == NULL) {
>         DFErrorFormat(error,"odf:document not found");
>         return 0;
>     }
> +
> +    int haveFields = ODF_simplifyFields(package);
>     /*
> -    int haveFields = Word_simplifyFields(package);
>     Word_mergeRuns(package);
> 
>     WordConverter *converter = WordConverterNew(html,abstractStorage,package,idPrefix);
> 
> http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/83720d9d/DocFormats/filters/ooxml/src/odf/lenses/ODFField.c
> ----------------------------------------------------------------------
> diff --git a/DocFormats/filters/ooxml/src/odf/lenses/ODFField.c b/DocFormats/filters/ooxml/src/odf/lenses/ODFField.c
> new file mode 100644
> index 0000000..e7abf45
> --- /dev/null
> +++ b/DocFormats/filters/ooxml/src/odf/lenses/ODFField.c
> @@ -0,0 +1,494 @@
> +// Licensed to the Apache Software Foundation (ASF) under one
> +// or more contributor license agreements.  See the NOTICE file
> +// distributed with this work for additional information
> +// regarding copyright ownership.  The ASF licenses this file
> +// to you under the Apache License, Version 2.0 (the
> +// "License"); you may not use this file except in compliance
> +// with the License.  You may obtain a copy of the License at
> +//
> +//   http://www.apache.org/licenses/LICENSE-2.0
> +//
> +// Unless required by applicable law or agreed to in writing,
> +// software distributed under the License is distributed on an
> +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
> +// KIND, either express or implied.  See the License for the
> +// specific language governing permissions and limitations
> +// under the License.
> +
> +#include "DFPlatform.h"
> +#include "ODFField.h"
> +/*
> +#include "ODFLenses.h"
> +#include "ODFBookmark.h"
> +#include "ODFObjects.h"
> +#include "ODFPackage.h"
> +#include "ODFCaption.h"
> +*/
> +
> +#include "DFDOM.h"
> +#include "DFXML.h"
> +#include "DFString.h"
> +#include "DFArray.h"
> +#include "DFCommon.h"
> +#include <assert.h>
> +#include <ctype.h>
> +#include <stdlib.h>
> +#include <string.h>
> +
> +/*
> +static void ODFFieldPut(ODFPutData *put, DFNode *abstract, DFNode *concrete);
> +
> +const char **ODF_parseField(const char *str)
> +{
> +    size_t len = strlen(str);
> +    DFArray *components = DFArrayNew((DFCopyFunction)xstrdup,free);
> +
> +    size_t start = 0;
> +    int inString = 0;
> +    for (size_t pos = 0; pos <= len; pos++) {
> +        if (inString) {
> +            if ((pos == len) || (str[pos] == '"')) {
> +                char *comp = DFSubstring(str,start,pos);
> +                DFArrayAppend(components,(char *)comp);
> +                free(comp);
> +                start = pos+1;
> +                inString = 0;
> +            }
> +        }
> +        else {
> +            if ((pos == len) || isspace(str[pos])) {
> +                if (pos > start) {
> +                    char *comp = DFSubstring(str,start,pos);
> +                    DFArrayAppend(components,(char *)comp);
> +                    free(comp);
> +                }
> +                start = pos+1;
> +            }
> +            else if (str[pos] == '"') {
> +                inString = 1;
> +                start = pos+1;
> +            }
> +        }
> +    }
> +
> +    const char **result = DFStringArrayFlatten(components);
> +    DFArrayRelease(components);
> +    return result;
> +}
> +
> +////////////////////////////////////////////////////////////////////////////////////////////////////
> +//                                                                                                //
> +//                                       DOM helper methods                                       //
> +//                                                                                                //
> +////////////////////////////////////////////////////////////////////////////////////////////////////
> +
> +typedef struct {
> +    DFNode *commonAncestor;
> +    DFNode *beginAncestor;
> +    DFNode *endAncestor;
> +} CommonAncestorInfo;
> +
> +static CommonAncestorInfo findCommonAncestor(DFNode *beginNode, DFNode *endNode)
> +{
> +    CommonAncestorInfo info = { NULL, NULL, NULL };
> +    for (DFNode *beginA = beginNode; beginA != NULL; beginA = beginA->parent) {
> +        for (DFNode *endA = endNode; endA != NULL; endA = endA->parent) {
> +            if (beginA->parent == endA->parent) {
> +                info.commonAncestor = beginA->parent;
> +                info.beginAncestor = beginA;
> +                info.endAncestor = endA;
> +                return info;
> +            }
> +        }
> +    }
> +    return info;
> +}
> +
> +static void removeNodes(DFNode *beginNode, DFNode *endNode)
> +{
> +    CommonAncestorInfo common = findCommonAncestor(beginNode,endNode);
> +    assert(common.commonAncestor != NULL);
> +    assert(common.beginAncestor != NULL);
> +    assert(common.endAncestor != NULL);
> +
> +    DFNode *begin = beginNode;
> +    while (begin != common.beginAncestor) {
> +        DFNode *parent = begin->parent;
> +        if (begin->next != NULL)
> +            DFRemoveNode(begin->next);
> +        else
> +            begin = parent;
> +    }
> +
> +    DFNode *end = endNode;
> +    while (end != common.endAncestor) {
> +        DFNode *parent = end->parent;
> +        if (end->prev != NULL)
> +            DFRemoveNode(end->prev);
> +        else
> +            end = parent;
> +    }
> +
> +    if (common.beginAncestor != common.endAncestor) {
> +        while (common.beginAncestor->next != common.endAncestor)
> +            DFRemoveNode(common.beginAncestor->next);
> +    }
> +
> +    while ((beginNode != NULL) && (beginNode->first == NULL) && (beginNode->tag != WORD_DOCUMENT)) {
> +        DFNode *parent = beginNode->parent;
> +        DFRemoveNode(beginNode);
> +        beginNode = parent;
> +    }
> +
> +    while ((endNode != NULL) && (endNode->first == NULL) && (endNode->tag != WORD_DOCUMENT)) {
> +        DFNode *parent = endNode->parent;
> +        DFRemoveNode(endNode);
> +        endNode = parent;
> +    }
> +}
> +*/
> +////////////////////////////////////////////////////////////////////////////////////////////////////
> +//                                                                                                //
> +//                                       ODFSimplification                                       //
> +//                                                                                                //
> +////////////////////////////////////////////////////////////////////////////////////////////////////
> +
> +typedef struct ODFSimplification {
> +    DFBuffer *instrText;
> +    DFNode *beginNode;
> +    DFNode *endNode;
> +    int haveFields;
> +    int inSeparate;
> +    int depth;
> +} ODFSimplification;
> +
> +static void replaceField(ODFSimplification *simp)
> +{
> +    assert(simp->instrText != NULL);
> +    assert(simp->beginNode != NULL);
> +    assert(simp->endNode != NULL);
> +
> +    if ((simp->beginNode->parent->tag == WORD_R) && (simp->endNode->parent->tag == WORD_R)) {
> +        DFNode *beginRun = simp->beginNode->parent;
> +
> +        DFNode *simple = DFCreateElement(simp->beginNode->doc,WORD_FLDSIMPLE);
> +        DFSetAttribute(simple,WORD_INSTR,simp->instrText->data);
> +        DFInsertBefore(beginRun->parent,simple,beginRun);
> +        // 1i: plug in later
> +        //removeNodes(simp->beginNode,simp->endNode);
> +    }
> +
> +    DFBufferRelease(simp->instrText);
> +    simp->instrText = NULL;
> +    simp->beginNode = NULL;
> +    simp->endNode = NULL;
> +
> +    simp->haveFields = 1;
> +}
> +
> +static void simplifyRecursive(ODFSimplification *simp, DFNode *node)
> +{
> +    switch (node->tag) {
> +        case WORD_FLDCHAR: {
> +            const char *type = DFGetAttribute(node,WORD_FLDCHARTYPE);
> +            if (DFStringEquals(type,"begin")) {
> +                if (simp->depth == 0) {
> +                    DFBufferRelease(simp->instrText);
> +                    simp->instrText = DFBufferNew();
> +                    simp->beginNode = node;
> +                    simp->endNode = NULL;
> +                    simp->inSeparate = 0;
> +                }
> +                simp->depth++;
> +            }
> +            else if (DFStringEquals(type,"end") && (simp->depth > 0)) {
> +                simp->depth--;
> +                if (simp->depth == 0) {
> +                    simp->endNode = node;
> +                    replaceField(simp);
> +                }
> +            }
> +            else if (DFStringEquals(type,"separate")) {
> +                if (simp->depth == 1)
> +                    simp->inSeparate = 1;
> +            }
> +            break;
> +        }
> +        case WORD_INSTRTEXT: {
> +            if ((simp->depth == 1) && !simp->inSeparate) {
> +                char *value = DFNodeTextToString(node);
> +                DFBufferFormat(simp->instrText,"%s",value);
> +                free(value);
> +            }
> +            break;
> +        }
> +    }
> +
> +    DFNode *next;
> +    for (DFNode *child = node->first; child != NULL; child = next) {
> +        next = child->next;
> +        simplifyRecursive(simp,child);
> +    }
> +}
> +
> +int ODF_simplifyFields(ODFPackage *package)
> +{
> +    ODFSimplification simp;
> +    bzero(&simp,sizeof(ODFSimplification));
> +    simplifyRecursive(&simp,package->contentDoc->docNode);
> +    DFBufferRelease(simp.instrText);
> +    return simp.haveFields;
> +}
> +
> +////////////////////////////////////////////////////////////////////////////////////////////////////
> +//                                                                                                //
> +//                                          ODFFieldLens                                         //
> +//                                                                                                //
> +////////////////////////////////////////////////////////////////////////////////////////////////////
> +/*
> +typedef enum {
> +    ODFRefUnknown,
> +    ODFRefNum,
> +    ODFRefText,
> +    ODFRefDirection,
> +    ODFRefCaptionText,
> +    ODFRefLabelNum,
> +} ODFRefType;
> +
> +static const char *ODFRefTypeClassName(ODFRefType refType)
> +{
> +    switch (refType) {
> +        case ODFRefText:
> +            return DFRefTextClass;
> +        case ODFRefDirection:
> +            return DFRefDirectionClass;
> +        case ODFRefCaptionText:
> +            return DFRefCaptionTextClass;
> +        case ODFRefLabelNum:
> +            return DFRefLabelNumClass;
> +        case ODFRefNum:
> +        default:
> +            return DFRefNumClass;
> +    }
> +}
> +
> +static ODFRefType ODFRefTypeGet(const char **args, ODFBookmark *bookmark)
> +{
> +    size_t argCount = DFStringArrayCount(args);
> +    ODFRefType type = ODFRefText;
> +
> +    for (size_t argno = 2; argno < argCount; argno++) {
> +        const char *arg = args[argno];
> +        if (!strcmp(arg,"\\r"))
> +            type = ODFRefNum; // Numbered reference (normal)
> +        else if (!strcmp(arg,"\\n"))
> +            type = ODFRefNum; // Numbered reference (no context)
> +        else if (!strcmp(arg,"\\w"))
> +            type = ODFRefNum; // Numbered reference (full context)
> +        else if (!strcmp(arg,"\\p"))
> +            type = ODFRefDirection;
> +    }
> +
> +    if ((bookmark->type == ODFBookmarkTable) ||
> +        (bookmark->type == ODFBookmarkFigure) ||
> +        (bookmark->type == ODFBookmarkEquation)) {
> +        if (type == ODFRefText) {
> +            DFNode *p = ODFFindContainingParagraph(bookmark->element);
> +            if (p != NULL) {
> +                CaptionParts parts = ODFBookmarkGetCaptionParts(bookmark);
> +
> +                if (parts.beforeNum && !parts.num && !parts.afterNum)
> +                    type = ODFRefCaptionText;
> +                else if (parts.beforeNum && parts.num && !parts.afterNum)
> +                    type = ODFRefLabelNum;
> +            }
> +        }
> +    }
> +
> +    return type;
> +}
> +
> +static DFNode *ODFFieldGet(ODFGetData *get, DFNode *concrete)
> +{
> +    if (concrete->tag != WORD_FLDSIMPLE)
> +        return NULL;;
> +
> +    const char *instr = DFGetAttribute(concrete,WORD_INSTR);
> +    if (instr != NULL) {
> +        const char **args = ODF_parseField(instr);
> +        size_t argCount = DFStringArrayCount(args);
> +
> +        if ((argCount >= 2) && !strcmp(args[0],"REF")) {
> +            ODFBookmark *bookmark = ODFObjectsBookmarkWithName(get->conv->objects,args[1]);
> +            if ((bookmark != NULL) && (bookmark->target != NULL)) {
> +
> +                ODFRefType type = ODFRefTypeGet(args,bookmark);
> +
> +                DFNode *a = ODFConverterCreateAbstract(get,HTML_A,concrete);
> +                DFFormatAttribute(a,HTML_HREF,"#%s%u",get->conv->idPrefix,bookmark->target->seqNo);
> +                DFSetAttribute(a,HTML_CLASS,ODFRefTypeClassName(type));
> +
> +                free(args);
> +                return a;
> +            }
> +        }
> +        else if ((argCount >= 1) && !strcmp(args[0],"TOC")) {
> +
> +            if ((argCount >= 2) && !strcmp(args[1],"\\o")) {
> +                DFNode *nav = ODFConverterCreateAbstract(get,HTML_NAV,concrete);
> +                DFSetAttribute(nav,HTML_CLASS,DFTableOfContentsClass);
> +                free(args);
> +                return nav;
> +            }
> +            else if ((argCount >= 3) && !strcmp(args[1],"\\c")) {
> +                // FIXME: The names "Figure" and "Table" here will be different if the document
> +                // was created in a language other than English. We need to look through the
> +                // document to figure out which counter names are used in captions adjacent to
> +                // figures and tables to know what the counter names used in the document
> +                // actually are.
> +
> +                // Another option might be just to collect a static list of names used in all the
> +                // major languages and base the detection on that. These would need to be checked
> +                // with multiple versions of word, as the names used could in theory change
> +                // between releases.
> +
> +                // We should keep track of a set of "document parameters", which record the names
> +                // used for figure and table counters, as well as the prefixes used on numbered
> +                // figures and tables. The latter would correspond to the content property of the
> +                // caption::before and figcaption::before CSS rules.
> +
> +                if (!strcmp(args[2],"Figure")) {
> +                    DFNode *nav = ODFConverterCreateAbstract(get,HTML_NAV,concrete);
> +                    DFSetAttribute(nav,HTML_CLASS,DFListOfFiguresClass);
> +                    free(args);
> +                    return nav;
> +                }
> +                else if (!strcmp(args[2],"Table")) {
> +                    DFNode *nav = ODFConverterCreateAbstract(get,HTML_NAV,concrete);
> +                    DFSetAttribute(nav,HTML_CLASS,DFListOfTablesClass);
> +                    free(args);
> +                    return nav;
> +                }
> +            }
> +        }
> +
> +        DFNode *span = ODFConverterCreateAbstract(get,HTML_SPAN,concrete);
> +        DFSetAttribute(span,HTML_CLASS,DFFieldClass);
> +        DFNode *text = DFCreateTextNode(get->conv->html,instr);
> +        DFAppendChild(span,text);
> +        free(args);
> +        return span;
> +    }
> +    return NULL;
> +}
> +
> +static int ODFFieldIsVisible(ODFPutData *put, DFNode *concrete)
> +{
> +    return 1;
> +}
> +
> +static DFNode *ODFFieldCreate(ODFPutData *put, DFNode *abstract)
> +{
> +    DFNode *concrete = DFCreateElement(put->contentDoc,WORD_FLDSIMPLE);
> +    // fldSimple elements are required to have an instr attribute (even if it's empty), so set
> +    // it here in case update doesn't change it for some reason
> +    DFSetAttribute(concrete,WORD_INSTR,"");
> +    ODFFieldPut(put,abstract,concrete);
> +    put->conv->haveFields = 1;
> +    return concrete;
> +}
> +
> +static const char *bookmarkNameForHtmlId(ODFConverter *converter, const char *htmlId, const char *refClass)
> +{
> +    DFNode *htmlElem = DFElementForIdAttr(converter->html,htmlId);
> +    if (htmlElem == NULL)
> +        return NULL;
> +    switch (htmlElem->tag) {
> +        case HTML_H1:
> +        case HTML_H2:
> +        case HTML_H3:
> +        case HTML_H4:
> +        case HTML_H5:
> +        case HTML_H6: {
> +            DFNode *labelSpan = htmlElem->first;
> +            if ((labelSpan == NULL) || (labelSpan->tag != HTML_SPAN))
> +                return NULL;;
> +            const char *labelClass = DFGetAttribute(labelSpan,HTML_CLASS);
> +            if (!DFStringEquals(labelClass,DFBookmarkClass))
> +                return NULL;
> +            return DFGetAttribute(labelSpan,WORD_NAME);
> +        }
> +        case HTML_FIGURE:
> +        case HTML_TABLE: {
> +            ODFCaption *caption = ODFObjectsCaptionForTarget(converter->objects,htmlElem);
> +            if (caption == NULL)
> +                return NULL;
> +            if (DFStringEquals(refClass,DFRefTextClass) && (caption->textBookmark != NULL))
> +                return caption->textBookmark->bookmarkName;
> +            else if (DFStringEquals(refClass,DFRefLabelNumClass) && (caption->labelNumBookmark != NULL))
> +                return caption->labelNumBookmark->bookmarkName;
> +            else if (DFStringEquals(refClass,DFRefCaptionTextClass) && (caption->captionTextBookmark != NULL))
> +                return caption->captionTextBookmark->bookmarkName;
> +            else if (caption->textBookmark != NULL)
> +                return caption->textBookmark->bookmarkName; // default is entire caption
> +        }
> +        default:
> +            return NULL;
> +    }
> +}
> +
> +static void ODFFieldPut(ODFPutData *put, DFNode *abstract, DFNode *concrete)
> +{
> +    switch (abstract->tag) {
> +        case HTML_SPAN: {
> +            const char *className = DFGetAttribute(abstract,HTML_CLASS);
> +            if (!DFStringEquals(className,DFFieldClass))
> +                return;
> +            char *text = DFNodeTextToString(abstract);
> +            DFSetAttribute(concrete,WORD_INSTR,text);
> +            free(text);
> +            break;
> +        }
> +        case HTML_A: {
> +            const char *href = DFGetAttribute(abstract,HTML_HREF);
> +            if ((href == NULL) || (href[0] != '#'))
> +                return;;
> +
> +            const char *targetId = &href[1];
> +            const char *className = DFGetAttribute(abstract,HTML_CLASS);
> +            if (className == NULL)
> +                className = "";;
> +            const char *bookmarkName = bookmarkNameForHtmlId(put->conv,targetId,className);
> +            if (bookmarkName == NULL)
> +                return;;
> +
> +            DFNode *htmlElem = DFElementForIdAttr(put->conv->html,targetId);
> +            if ((htmlElem != NULL) && ((htmlElem->tag == HTML_TABLE) || (htmlElem->tag == HTML_FIGURE))) {
> +                if (!DFStringEquals(className,DFRefTextClass) &&
> +                    !DFStringEquals(className,DFRefLabelNumClass) &&
> +                    !DFStringEquals(className,DFRefCaptionTextClass))
> +                    className = DFRefTextClass;
> +            }
> +
> +            if (DFStringEquals(className,DFRefTextClass) ||
> +                DFStringEquals(className,DFRefLabelNumClass) ||
> +                DFStringEquals(className,DFRefCaptionTextClass))
> +                DFFormatAttribute(concrete,WORD_INSTR," REF %s \\h ",bookmarkName);
> +            else if (DFStringEquals(className,DFRefDirectionClass))
> +                DFFormatAttribute(concrete,WORD_INSTR," REF %s \\p \\h ",bookmarkName);
> +            else
> +                DFFormatAttribute(concrete,WORD_INSTR," REF %s \\r \\h ",bookmarkName);
> +            break;
> +        }
> +    }
> +}
> +
> +ODFLens ODFFieldLens = {
> +    .isVisible = ODFFieldIsVisible,
> +    .get = ODFFieldGet,
> +    .put = ODFFieldPut,
> +    .create = ODFFieldCreate,
> +    .remove = NULL, // LENS FIXME
> +};
> +
> +*/
> 
> http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/83720d9d/DocFormats/filters/ooxml/src/odf/lenses/ODFField.h
> ----------------------------------------------------------------------
> diff --git a/DocFormats/filters/ooxml/src/odf/lenses/ODFField.h b/DocFormats/filters/ooxml/src/odf/lenses/ODFField.h
> new file mode 100644
> index 0000000..99e4252
> --- /dev/null
> +++ b/DocFormats/filters/ooxml/src/odf/lenses/ODFField.h
> @@ -0,0 +1,27 @@
> +// Licensed to the Apache Software Foundation (ASF) under one
> +// or more contributor license agreements.  See the NOTICE file
> +// distributed with this work for additional information
> +// regarding copyright ownership.  The ASF licenses this file
> +// to you under the Apache License, Version 2.0 (the
> +// "License"); you may not use this file except in compliance
> +// with the License.  You may obtain a copy of the License at
> +//
> +//   http://www.apache.org/licenses/LICENSE-2.0
> +//
> +// Unless required by applicable law or agreed to in writing,
> +// software distributed under the License is distributed on an
> +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
> +// KIND, either express or implied.  See the License for the
> +// specific language governing permissions and limitations
> +// under the License.
> +
> +#ifndef DocFormats_ODFField_h
> +#define DocFormats_ODFField_h
> +
> +#include "ODFPackage.h"
> +
> +const char **ODF_parseField(const char *cstr);
> +
> +int ODF_simplifyFields(ODFPackage *package);
> +
> +#endif
> 


Re: incubator-corinthia git commit: Copy parts of src/word/lenses to src/odf/lenses/.

Posted by Gabriela Gibson <ga...@gmail.com>.
On Fri, Apr 24, 2015 at 6:02 PM, Peter Kelly <pm...@apache.org> wrote:

> > On 24 Apr 2015, at 11:28 pm, Gabriela Gibson <ga...@gmail.com>
> wrote:
> >
> > Ok :)
> >
> > Should I simply revert everything I have shipped and start over?
>
> That’s probably the easiest option. Tell you what, tomorrow I’ll sit down
> and write an overview of how the Word filter works on the wiki (a long
> overdue task) and fill in some of the missing structure for ODF so you have
> that in place. I realise it’s hard just looking at the big chunk of code
> and trying to figure out how to get started :)
>
> I felt like 'transformer owl'... :-)

 https://www.youtube.com/watch?v=WRXT_TrUbiw

> It's not like it's a huge amount of work lost and it still was educational
> > anyway and starting to pick out the changes probably takes longer than
> > going from scratch.
>
> This is a great point… the value of writing code often turns out to be
> educational; I think I actually had about three attempts at writing the
> word filter before I eventually came up with the structure. The same is
> true of a lot of code I write, so this is normal ;)
>
>
The latest version is always the bestests ;-D

G

> —
> Dr Peter M. Kelly
> pmkelly@apache.org
>
> PGP key: http://www.kellypmk.net/pgp-key <http://www.kellypmk.net/pgp-key>
> (fingerprint 5435 6718 59F0 DD1F BFA0 5E46 2523 BAA1 44AE 2966)
>
>


-- 
Visit my Coding Diary: http://gabriela-gibson.blogspot.com/

Re: incubator-corinthia git commit: Copy parts of src/word/lenses to src/odf/lenses/.

Posted by Peter Kelly <pm...@apache.org>.
> On 24 Apr 2015, at 11:28 pm, Gabriela Gibson <ga...@gmail.com> wrote:
> 
> Ok :)
> 
> Should I simply revert everything I have shipped and start over?

That’s probably the easiest option. Tell you what, tomorrow I’ll sit down and write an overview of how the Word filter works on the wiki (a long overdue task) and fill in some of the missing structure for ODF so you have that in place. I realise it’s hard just looking at the big chunk of code and trying to figure out how to get started :)

> It's not like it's a huge amount of work lost and it still was educational
> anyway and starting to pick out the changes probably takes longer than
> going from scratch.

This is a great point… the value of writing code often turns out to be educational; I think I actually had about three attempts at writing the word filter before I eventually came up with the structure. The same is true of a lot of code I write, so this is normal ;)

—
Dr Peter M. Kelly
pmkelly@apache.org

PGP key: http://www.kellypmk.net/pgp-key <http://www.kellypmk.net/pgp-key>
(fingerprint 5435 6718 59F0 DD1F BFA0 5E46 2523 BAA1 44AE 2966)


Re: incubator-corinthia git commit: Copy parts of src/word/lenses to src/odf/lenses/.

Posted by Gabriela Gibson <ga...@gmail.com>.
Ok :)

Should I simply revert everything I have shipped and start over?

It's not like it's a huge amount of work lost and it still was educational
anyway and starting to pick out the changes probably takes longer than
going from scratch.

G

On Fri, Apr 24, 2015 at 5:09 PM, Peter Kelly <ke...@gmail.com> wrote:

> Also, the odf directory should not go under DocFormats/filters/ooxml/src.
>
> There’s already a DocFormats/filters/odf/src directory with some files in
> it, including class definitions for ODFPackage, ODFSheet and the like.
>
> I’ll try to find some time over the weekend to help you get up with an
> appropriate structure with a bunch of TODOs in the code.
>
> --
> Dr. Peter M. Kelly
> kellypmk@gmail.com
> http://www.kellypmk.net/
>
> PGP key: http://www.kellypmk.net/pgp-key <http://www.kellypmk.net/pgp-key>
> (fingerprint 5435 6718 59F0 DD1F BFA0 5E46 2523 BAA1 44AE 2966)
>
> > On 24 Apr 2015, at 8:31 pm, gbg@apache.org wrote:
> >
> > Repository: incubator-corinthia
> > Updated Branches:
> >  refs/heads/master ff322cb4f -> 83720d9dd
> >
> >
> > Copy parts of src/word/lenses to src/odf/lenses/.
> >
> > * DocFormats/filters/odf/src/text/ODFText.c
> >
> >  (ODFTextGet): Uncomment section.  Function now 'works'.
> >
> > * DocFormats/filters/ooxml/CMakeLists.txt
> >
> >  (set): Add new group: GroupOOXMLODFLenses.
> >         Add files: src/odf/lenses/ODFField.*
> >
> >  (add_library): Add groups GroupOOXMODF and GroupOOXMLODFLenses.
> >
> > * DocFormats/filters/ooxml/src/odf/ODFConverter.c
> >
> >  (#include): Add temporary stdio.h for easy debugging.
> >
> >  (WordConverterGetConcrete): Change Error message.  Add temporary
> >    magic number in lieu of OFFICE_DOCUMENT not seeming to be the
> >    right choice.  Move commmenting out below ODF_simplifyFields()
> >    call.
> >
> > * DocFormats/filters/ooxml/src/odf/lenses/ODFField.h
> >
> >  (): New file.  String replaced copy of  /word/lenses/
> >
> > * DocFormats/filters/ooxml/src/odf/lenses/ODFField.c
> >
> >  (): New file.  String replaced copy of  /word/lenses/
> >
> >
> > Project: http://git-wip-us.apache.org/repos/asf/incubator-corinthia/repo
> > Commit:
> http://git-wip-us.apache.org/repos/asf/incubator-corinthia/commit/83720d9d
> > Tree:
> http://git-wip-us.apache.org/repos/asf/incubator-corinthia/tree/83720d9d
> > Diff:
> http://git-wip-us.apache.org/repos/asf/incubator-corinthia/diff/83720d9d
> >
> > Branch: refs/heads/master
> > Commit: 83720d9ddad4d1021e3cf0b416ebd7a761771cae
> > Parents: ff322cb
> > Author: Gabriela Gibson <gb...@apache.org>
> > Authored: Fri Apr 24 14:31:43 2015 +0100
> > Committer: Gabriela Gibson <gb...@apache.org>
> > Committed: Fri Apr 24 14:31:43 2015 +0100
> >
> > ----------------------------------------------------------------------
> > DocFormats/filters/odf/src/text/ODFText.c       |   4 +-
> > DocFormats/filters/ooxml/CMakeLists.txt         |  10 +-
> > DocFormats/filters/ooxml/src/odf/ODFConverter.c |  13 +-
> > .../filters/ooxml/src/odf/lenses/ODFField.c     | 494 +++++++++++++++++++
> > .../filters/ooxml/src/odf/lenses/ODFField.h     |  27 +
> > 5 files changed, 540 insertions(+), 8 deletions(-)
> > ----------------------------------------------------------------------
> >
> >
> >
> http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/83720d9d/DocFormats/filters/odf/src/text/ODFText.c
> > ----------------------------------------------------------------------
> > diff --git a/DocFormats/filters/odf/src/text/ODFText.c
> b/DocFormats/filters/odf/src/text/ODFText.c
> > index 683e2d6..417aea9 100644
> > --- a/DocFormats/filters/odf/src/text/ODFText.c
> > +++ b/DocFormats/filters/odf/src/text/ODFText.c
> > @@ -41,7 +41,7 @@ DFDocument *ODFTextGet(DFStorage *concreteStorage,
> DFStorage *abstractStorage, c
> >     ok = 1;
> >
> > end:
> > -    /*
> > +
> >     ODFPackageRelease(odfPackage);
> >     if (ok) {
> >         return htmlDoc;
> > @@ -50,7 +50,7 @@ end:
> >         DFDocumentRelease(htmlDoc);
> >         return NULL;
> >     }
> > -    */
> > +
> >     return NULL;
> > }
> >
> >
> >
> http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/83720d9d/DocFormats/filters/ooxml/CMakeLists.txt
> > ----------------------------------------------------------------------
> > diff --git a/DocFormats/filters/ooxml/CMakeLists.txt
> b/DocFormats/filters/ooxml/CMakeLists.txt
> > index 4696309..3ba0b01 100644
> > --- a/DocFormats/filters/ooxml/CMakeLists.txt
> > +++ b/DocFormats/filters/ooxml/CMakeLists.txt
> > @@ -29,6 +29,13 @@ set(GroupOOXMLODF
> >     src/odf/ODFConverter.c
> >     src/odf/ODFConverter.h)
> >
> > +###
> > +## group ooxml odf lenses objects
> > +###
> > +set(GroupOOXMLODFLenses
> > +    src/odf/lenses/ODFField.c
> > +    src/odf/lenses/ODFField.h)
> > +
> >
> > ###
> > ## group ooxml word objects
> > @@ -160,7 +167,8 @@ add_library(ooxml OBJECT
> >     ${GroupOOXMLWordFormatting}
> >     ${GroupOOXMLWordLenses}
> >     ${GroupOOXMLWordTests}
> > -    ${GroupOOXMLODF})
> > +    ${GroupOOXMLODF}
> > +    ${GroupOOXMLODFLenses})
> >
> > source_group(src\\common           FILES ${GroupOOXMLCommon})
> > source_group(src\\word             FILES ${GroupOOXMLWord})
> >
> >
> http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/83720d9d/DocFormats/filters/ooxml/src/odf/ODFConverter.c
> > ----------------------------------------------------------------------
> > diff --git a/DocFormats/filters/ooxml/src/odf/ODFConverter.c
> b/DocFormats/filters/ooxml/src/odf/ODFConverter.c
> > index c7d79e8..3a21567 100644
> > --- a/DocFormats/filters/ooxml/src/odf/ODFConverter.c
> > +++ b/DocFormats/filters/ooxml/src/odf/ODFConverter.c
> > @@ -54,6 +54,8 @@
> > #include <stdlib.h>
> > #include <string.h>
> >
> > +// 1i: debugging include --- remove
> > +#include <stdio.h>
> > /*
> > static int isWhitespaceRun(DFNode *run)
> > {
> > @@ -688,20 +690,21 @@ DFNode *WordConverterGetConcrete(WordPutData *put,
> DFNode *abstract)
> >
> > int ODFConverterGet(DFDocument *html, DFStorage *abstractStorage,
> ODFPackage *package, const char *idPrefix, DFError **error)
> > {
> > -    // 1i: contentDoc is a crude guess here.
> >     if (package->contentDoc == NULL) {
> > -        DFErrorFormat(error,"document.xml not found");
> > +        DFErrorFormat(error,"content.xml not found");
> >         return 0;
> >     }
> >
> > -    // 1i: asssuming that OFFIC means AOO and so the WORD_DOCUMENT
> equivalent is OFFICE_DOCUMENT
> > -    DFNode *odfDocument =
> DFChildWithTag(package->contentDoc->docNode,OFFICE_DOCUMENT);
> > +    // 1i: this line needs work on the xml tags.
> > +    printf("OFFICE_DOCUMENT is %d\n", OFFICE_DOCUMENT);
> > +    DFNode *odfDocument =
> DFChildWithTag(package->contentDoc->docNode,1469 /* magic number for what I
> found in gdb */);
> >     if (odfDocument == NULL) {
> >         DFErrorFormat(error,"odf:document not found");
> >         return 0;
> >     }
> > +
> > +    int haveFields = ODF_simplifyFields(package);
> >     /*
> > -    int haveFields = Word_simplifyFields(package);
> >     Word_mergeRuns(package);
> >
> >     WordConverter *converter =
> WordConverterNew(html,abstractStorage,package,idPrefix);
> >
> >
> http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/83720d9d/DocFormats/filters/ooxml/src/odf/lenses/ODFField.c
> > ----------------------------------------------------------------------
> > diff --git a/DocFormats/filters/ooxml/src/odf/lenses/ODFField.c
> b/DocFormats/filters/ooxml/src/odf/lenses/ODFField.c
> > new file mode 100644
> > index 0000000..e7abf45
> > --- /dev/null
> > +++ b/DocFormats/filters/ooxml/src/odf/lenses/ODFField.c
> > @@ -0,0 +1,494 @@
> > +// Licensed to the Apache Software Foundation (ASF) under one
> > +// or more contributor license agreements.  See the NOTICE file
> > +// distributed with this work for additional information
> > +// regarding copyright ownership.  The ASF licenses this file
> > +// to you under the Apache License, Version 2.0 (the
> > +// "License"); you may not use this file except in compliance
> > +// with the License.  You may obtain a copy of the License at
> > +//
> > +//   http://www.apache.org/licenses/LICENSE-2.0
> > +//
> > +// Unless required by applicable law or agreed to in writing,
> > +// software distributed under the License is distributed on an
> > +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
> > +// KIND, either express or implied.  See the License for the
> > +// specific language governing permissions and limitations
> > +// under the License.
> > +
> > +#include "DFPlatform.h"
> > +#include "ODFField.h"
> > +/*
> > +#include "ODFLenses.h"
> > +#include "ODFBookmark.h"
> > +#include "ODFObjects.h"
> > +#include "ODFPackage.h"
> > +#include "ODFCaption.h"
> > +*/
> > +
> > +#include "DFDOM.h"
> > +#include "DFXML.h"
> > +#include "DFString.h"
> > +#include "DFArray.h"
> > +#include "DFCommon.h"
> > +#include <assert.h>
> > +#include <ctype.h>
> > +#include <stdlib.h>
> > +#include <string.h>
> > +
> > +/*
> > +static void ODFFieldPut(ODFPutData *put, DFNode *abstract, DFNode
> *concrete);
> > +
> > +const char **ODF_parseField(const char *str)
> > +{
> > +    size_t len = strlen(str);
> > +    DFArray *components = DFArrayNew((DFCopyFunction)xstrdup,free);
> > +
> > +    size_t start = 0;
> > +    int inString = 0;
> > +    for (size_t pos = 0; pos <= len; pos++) {
> > +        if (inString) {
> > +            if ((pos == len) || (str[pos] == '"')) {
> > +                char *comp = DFSubstring(str,start,pos);
> > +                DFArrayAppend(components,(char *)comp);
> > +                free(comp);
> > +                start = pos+1;
> > +                inString = 0;
> > +            }
> > +        }
> > +        else {
> > +            if ((pos == len) || isspace(str[pos])) {
> > +                if (pos > start) {
> > +                    char *comp = DFSubstring(str,start,pos);
> > +                    DFArrayAppend(components,(char *)comp);
> > +                    free(comp);
> > +                }
> > +                start = pos+1;
> > +            }
> > +            else if (str[pos] == '"') {
> > +                inString = 1;
> > +                start = pos+1;
> > +            }
> > +        }
> > +    }
> > +
> > +    const char **result = DFStringArrayFlatten(components);
> > +    DFArrayRelease(components);
> > +    return result;
> > +}
> > +
> >
> +////////////////////////////////////////////////////////////////////////////////////////////////////
> > +//
>                           //
> > +//                                       DOM helper methods
>                            //
> > +//
>                           //
> >
> +////////////////////////////////////////////////////////////////////////////////////////////////////
> > +
> > +typedef struct {
> > +    DFNode *commonAncestor;
> > +    DFNode *beginAncestor;
> > +    DFNode *endAncestor;
> > +} CommonAncestorInfo;
> > +
> > +static CommonAncestorInfo findCommonAncestor(DFNode *beginNode, DFNode
> *endNode)
> > +{
> > +    CommonAncestorInfo info = { NULL, NULL, NULL };
> > +    for (DFNode *beginA = beginNode; beginA != NULL; beginA =
> beginA->parent) {
> > +        for (DFNode *endA = endNode; endA != NULL; endA = endA->parent)
> {
> > +            if (beginA->parent == endA->parent) {
> > +                info.commonAncestor = beginA->parent;
> > +                info.beginAncestor = beginA;
> > +                info.endAncestor = endA;
> > +                return info;
> > +            }
> > +        }
> > +    }
> > +    return info;
> > +}
> > +
> > +static void removeNodes(DFNode *beginNode, DFNode *endNode)
> > +{
> > +    CommonAncestorInfo common = findCommonAncestor(beginNode,endNode);
> > +    assert(common.commonAncestor != NULL);
> > +    assert(common.beginAncestor != NULL);
> > +    assert(common.endAncestor != NULL);
> > +
> > +    DFNode *begin = beginNode;
> > +    while (begin != common.beginAncestor) {
> > +        DFNode *parent = begin->parent;
> > +        if (begin->next != NULL)
> > +            DFRemoveNode(begin->next);
> > +        else
> > +            begin = parent;
> > +    }
> > +
> > +    DFNode *end = endNode;
> > +    while (end != common.endAncestor) {
> > +        DFNode *parent = end->parent;
> > +        if (end->prev != NULL)
> > +            DFRemoveNode(end->prev);
> > +        else
> > +            end = parent;
> > +    }
> > +
> > +    if (common.beginAncestor != common.endAncestor) {
> > +        while (common.beginAncestor->next != common.endAncestor)
> > +            DFRemoveNode(common.beginAncestor->next);
> > +    }
> > +
> > +    while ((beginNode != NULL) && (beginNode->first == NULL) &&
> (beginNode->tag != WORD_DOCUMENT)) {
> > +        DFNode *parent = beginNode->parent;
> > +        DFRemoveNode(beginNode);
> > +        beginNode = parent;
> > +    }
> > +
> > +    while ((endNode != NULL) && (endNode->first == NULL) &&
> (endNode->tag != WORD_DOCUMENT)) {
> > +        DFNode *parent = endNode->parent;
> > +        DFRemoveNode(endNode);
> > +        endNode = parent;
> > +    }
> > +}
> > +*/
> >
> +////////////////////////////////////////////////////////////////////////////////////////////////////
> > +//
>                           //
> > +//                                       ODFSimplification
>                          //
> > +//
>                           //
> >
> +////////////////////////////////////////////////////////////////////////////////////////////////////
> > +
> > +typedef struct ODFSimplification {
> > +    DFBuffer *instrText;
> > +    DFNode *beginNode;
> > +    DFNode *endNode;
> > +    int haveFields;
> > +    int inSeparate;
> > +    int depth;
> > +} ODFSimplification;
> > +
> > +static void replaceField(ODFSimplification *simp)
> > +{
> > +    assert(simp->instrText != NULL);
> > +    assert(simp->beginNode != NULL);
> > +    assert(simp->endNode != NULL);
> > +
> > +    if ((simp->beginNode->parent->tag == WORD_R) &&
> (simp->endNode->parent->tag == WORD_R)) {
> > +        DFNode *beginRun = simp->beginNode->parent;
> > +
> > +        DFNode *simple =
> DFCreateElement(simp->beginNode->doc,WORD_FLDSIMPLE);
> > +        DFSetAttribute(simple,WORD_INSTR,simp->instrText->data);
> > +        DFInsertBefore(beginRun->parent,simple,beginRun);
> > +        // 1i: plug in later
> > +        //removeNodes(simp->beginNode,simp->endNode);
> > +    }
> > +
> > +    DFBufferRelease(simp->instrText);
> > +    simp->instrText = NULL;
> > +    simp->beginNode = NULL;
> > +    simp->endNode = NULL;
> > +
> > +    simp->haveFields = 1;
> > +}
> > +
> > +static void simplifyRecursive(ODFSimplification *simp, DFNode *node)
> > +{
> > +    switch (node->tag) {
> > +        case WORD_FLDCHAR: {
> > +            const char *type = DFGetAttribute(node,WORD_FLDCHARTYPE);
> > +            if (DFStringEquals(type,"begin")) {
> > +                if (simp->depth == 0) {
> > +                    DFBufferRelease(simp->instrText);
> > +                    simp->instrText = DFBufferNew();
> > +                    simp->beginNode = node;
> > +                    simp->endNode = NULL;
> > +                    simp->inSeparate = 0;
> > +                }
> > +                simp->depth++;
> > +            }
> > +            else if (DFStringEquals(type,"end") && (simp->depth > 0)) {
> > +                simp->depth--;
> > +                if (simp->depth == 0) {
> > +                    simp->endNode = node;
> > +                    replaceField(simp);
> > +                }
> > +            }
> > +            else if (DFStringEquals(type,"separate")) {
> > +                if (simp->depth == 1)
> > +                    simp->inSeparate = 1;
> > +            }
> > +            break;
> > +        }
> > +        case WORD_INSTRTEXT: {
> > +            if ((simp->depth == 1) && !simp->inSeparate) {
> > +                char *value = DFNodeTextToString(node);
> > +                DFBufferFormat(simp->instrText,"%s",value);
> > +                free(value);
> > +            }
> > +            break;
> > +        }
> > +    }
> > +
> > +    DFNode *next;
> > +    for (DFNode *child = node->first; child != NULL; child = next) {
> > +        next = child->next;
> > +        simplifyRecursive(simp,child);
> > +    }
> > +}
> > +
> > +int ODF_simplifyFields(ODFPackage *package)
> > +{
> > +    ODFSimplification simp;
> > +    bzero(&simp,sizeof(ODFSimplification));
> > +    simplifyRecursive(&simp,package->contentDoc->docNode);
> > +    DFBufferRelease(simp.instrText);
> > +    return simp.haveFields;
> > +}
> > +
> >
> +////////////////////////////////////////////////////////////////////////////////////////////////////
> > +//
>                           //
> > +//                                          ODFFieldLens
>                          //
> > +//
>                           //
> >
> +////////////////////////////////////////////////////////////////////////////////////////////////////
> > +/*
> > +typedef enum {
> > +    ODFRefUnknown,
> > +    ODFRefNum,
> > +    ODFRefText,
> > +    ODFRefDirection,
> > +    ODFRefCaptionText,
> > +    ODFRefLabelNum,
> > +} ODFRefType;
> > +
> > +static const char *ODFRefTypeClassName(ODFRefType refType)
> > +{
> > +    switch (refType) {
> > +        case ODFRefText:
> > +            return DFRefTextClass;
> > +        case ODFRefDirection:
> > +            return DFRefDirectionClass;
> > +        case ODFRefCaptionText:
> > +            return DFRefCaptionTextClass;
> > +        case ODFRefLabelNum:
> > +            return DFRefLabelNumClass;
> > +        case ODFRefNum:
> > +        default:
> > +            return DFRefNumClass;
> > +    }
> > +}
> > +
> > +static ODFRefType ODFRefTypeGet(const char **args, ODFBookmark
> *bookmark)
> > +{
> > +    size_t argCount = DFStringArrayCount(args);
> > +    ODFRefType type = ODFRefText;
> > +
> > +    for (size_t argno = 2; argno < argCount; argno++) {
> > +        const char *arg = args[argno];
> > +        if (!strcmp(arg,"\\r"))
> > +            type = ODFRefNum; // Numbered reference (normal)
> > +        else if (!strcmp(arg,"\\n"))
> > +            type = ODFRefNum; // Numbered reference (no context)
> > +        else if (!strcmp(arg,"\\w"))
> > +            type = ODFRefNum; // Numbered reference (full context)
> > +        else if (!strcmp(arg,"\\p"))
> > +            type = ODFRefDirection;
> > +    }
> > +
> > +    if ((bookmark->type == ODFBookmarkTable) ||
> > +        (bookmark->type == ODFBookmarkFigure) ||
> > +        (bookmark->type == ODFBookmarkEquation)) {
> > +        if (type == ODFRefText) {
> > +            DFNode *p = ODFFindContainingParagraph(bookmark->element);
> > +            if (p != NULL) {
> > +                CaptionParts parts =
> ODFBookmarkGetCaptionParts(bookmark);
> > +
> > +                if (parts.beforeNum && !parts.num && !parts.afterNum)
> > +                    type = ODFRefCaptionText;
> > +                else if (parts.beforeNum && parts.num &&
> !parts.afterNum)
> > +                    type = ODFRefLabelNum;
> > +            }
> > +        }
> > +    }
> > +
> > +    return type;
> > +}
> > +
> > +static DFNode *ODFFieldGet(ODFGetData *get, DFNode *concrete)
> > +{
> > +    if (concrete->tag != WORD_FLDSIMPLE)
> > +        return NULL;;
> > +
> > +    const char *instr = DFGetAttribute(concrete,WORD_INSTR);
> > +    if (instr != NULL) {
> > +        const char **args = ODF_parseField(instr);
> > +        size_t argCount = DFStringArrayCount(args);
> > +
> > +        if ((argCount >= 2) && !strcmp(args[0],"REF")) {
> > +            ODFBookmark *bookmark =
> ODFObjectsBookmarkWithName(get->conv->objects,args[1]);
> > +            if ((bookmark != NULL) && (bookmark->target != NULL)) {
> > +
> > +                ODFRefType type = ODFRefTypeGet(args,bookmark);
> > +
> > +                DFNode *a =
> ODFConverterCreateAbstract(get,HTML_A,concrete);
> > +
> DFFormatAttribute(a,HTML_HREF,"#%s%u",get->conv->idPrefix,bookmark->target->seqNo);
> > +                DFSetAttribute(a,HTML_CLASS,ODFRefTypeClassName(type));
> > +
> > +                free(args);
> > +                return a;
> > +            }
> > +        }
> > +        else if ((argCount >= 1) && !strcmp(args[0],"TOC")) {
> > +
> > +            if ((argCount >= 2) && !strcmp(args[1],"\\o")) {
> > +                DFNode *nav =
> ODFConverterCreateAbstract(get,HTML_NAV,concrete);
> > +                DFSetAttribute(nav,HTML_CLASS,DFTableOfContentsClass);
> > +                free(args);
> > +                return nav;
> > +            }
> > +            else if ((argCount >= 3) && !strcmp(args[1],"\\c")) {
> > +                // FIXME: The names "Figure" and "Table" here will be
> different if the document
> > +                // was created in a language other than English. We
> need to look through the
> > +                // document to figure out which counter names are used
> in captions adjacent to
> > +                // figures and tables to know what the counter names
> used in the document
> > +                // actually are.
> > +
> > +                // Another option might be just to collect a static
> list of names used in all the
> > +                // major languages and base the detection on that.
> These would need to be checked
> > +                // with multiple versions of word, as the names used
> could in theory change
> > +                // between releases.
> > +
> > +                // We should keep track of a set of "document
> parameters", which record the names
> > +                // used for figure and table counters, as well as the
> prefixes used on numbered
> > +                // figures and tables. The latter would correspond to
> the content property of the
> > +                // caption::before and figcaption::before CSS rules.
> > +
> > +                if (!strcmp(args[2],"Figure")) {
> > +                    DFNode *nav =
> ODFConverterCreateAbstract(get,HTML_NAV,concrete);
> > +                    DFSetAttribute(nav,HTML_CLASS,DFListOfFiguresClass);
> > +                    free(args);
> > +                    return nav;
> > +                }
> > +                else if (!strcmp(args[2],"Table")) {
> > +                    DFNode *nav =
> ODFConverterCreateAbstract(get,HTML_NAV,concrete);
> > +                    DFSetAttribute(nav,HTML_CLASS,DFListOfTablesClass);
> > +                    free(args);
> > +                    return nav;
> > +                }
> > +            }
> > +        }
> > +
> > +        DFNode *span =
> ODFConverterCreateAbstract(get,HTML_SPAN,concrete);
> > +        DFSetAttribute(span,HTML_CLASS,DFFieldClass);
> > +        DFNode *text = DFCreateTextNode(get->conv->html,instr);
> > +        DFAppendChild(span,text);
> > +        free(args);
> > +        return span;
> > +    }
> > +    return NULL;
> > +}
> > +
> > +static int ODFFieldIsVisible(ODFPutData *put, DFNode *concrete)
> > +{
> > +    return 1;
> > +}
> > +
> > +static DFNode *ODFFieldCreate(ODFPutData *put, DFNode *abstract)
> > +{
> > +    DFNode *concrete = DFCreateElement(put->contentDoc,WORD_FLDSIMPLE);
> > +    // fldSimple elements are required to have an instr attribute (even
> if it's empty), so set
> > +    // it here in case update doesn't change it for some reason
> > +    DFSetAttribute(concrete,WORD_INSTR,"");
> > +    ODFFieldPut(put,abstract,concrete);
> > +    put->conv->haveFields = 1;
> > +    return concrete;
> > +}
> > +
> > +static const char *bookmarkNameForHtmlId(ODFConverter *converter, const
> char *htmlId, const char *refClass)
> > +{
> > +    DFNode *htmlElem = DFElementForIdAttr(converter->html,htmlId);
> > +    if (htmlElem == NULL)
> > +        return NULL;
> > +    switch (htmlElem->tag) {
> > +        case HTML_H1:
> > +        case HTML_H2:
> > +        case HTML_H3:
> > +        case HTML_H4:
> > +        case HTML_H5:
> > +        case HTML_H6: {
> > +            DFNode *labelSpan = htmlElem->first;
> > +            if ((labelSpan == NULL) || (labelSpan->tag != HTML_SPAN))
> > +                return NULL;;
> > +            const char *labelClass =
> DFGetAttribute(labelSpan,HTML_CLASS);
> > +            if (!DFStringEquals(labelClass,DFBookmarkClass))
> > +                return NULL;
> > +            return DFGetAttribute(labelSpan,WORD_NAME);
> > +        }
> > +        case HTML_FIGURE:
> > +        case HTML_TABLE: {
> > +            ODFCaption *caption =
> ODFObjectsCaptionForTarget(converter->objects,htmlElem);
> > +            if (caption == NULL)
> > +                return NULL;
> > +            if (DFStringEquals(refClass,DFRefTextClass) &&
> (caption->textBookmark != NULL))
> > +                return caption->textBookmark->bookmarkName;
> > +            else if (DFStringEquals(refClass,DFRefLabelNumClass) &&
> (caption->labelNumBookmark != NULL))
> > +                return caption->labelNumBookmark->bookmarkName;
> > +            else if (DFStringEquals(refClass,DFRefCaptionTextClass) &&
> (caption->captionTextBookmark != NULL))
> > +                return caption->captionTextBookmark->bookmarkName;
> > +            else if (caption->textBookmark != NULL)
> > +                return caption->textBookmark->bookmarkName; // default
> is entire caption
> > +        }
> > +        default:
> > +            return NULL;
> > +    }
> > +}
> > +
> > +static void ODFFieldPut(ODFPutData *put, DFNode *abstract, DFNode
> *concrete)
> > +{
> > +    switch (abstract->tag) {
> > +        case HTML_SPAN: {
> > +            const char *className = DFGetAttribute(abstract,HTML_CLASS);
> > +            if (!DFStringEquals(className,DFFieldClass))
> > +                return;
> > +            char *text = DFNodeTextToString(abstract);
> > +            DFSetAttribute(concrete,WORD_INSTR,text);
> > +            free(text);
> > +            break;
> > +        }
> > +        case HTML_A: {
> > +            const char *href = DFGetAttribute(abstract,HTML_HREF);
> > +            if ((href == NULL) || (href[0] != '#'))
> > +                return;;
> > +
> > +            const char *targetId = &href[1];
> > +            const char *className = DFGetAttribute(abstract,HTML_CLASS);
> > +            if (className == NULL)
> > +                className = "";;
> > +            const char *bookmarkName =
> bookmarkNameForHtmlId(put->conv,targetId,className);
> > +            if (bookmarkName == NULL)
> > +                return;;
> > +
> > +            DFNode *htmlElem =
> DFElementForIdAttr(put->conv->html,targetId);
> > +            if ((htmlElem != NULL) && ((htmlElem->tag == HTML_TABLE) ||
> (htmlElem->tag == HTML_FIGURE))) {
> > +                if (!DFStringEquals(className,DFRefTextClass) &&
> > +                    !DFStringEquals(className,DFRefLabelNumClass) &&
> > +                    !DFStringEquals(className,DFRefCaptionTextClass))
> > +                    className = DFRefTextClass;
> > +            }
> > +
> > +            if (DFStringEquals(className,DFRefTextClass) ||
> > +                DFStringEquals(className,DFRefLabelNumClass) ||
> > +                DFStringEquals(className,DFRefCaptionTextClass))
> > +                DFFormatAttribute(concrete,WORD_INSTR," REF %s \\h
> ",bookmarkName);
> > +            else if (DFStringEquals(className,DFRefDirectionClass))
> > +                DFFormatAttribute(concrete,WORD_INSTR," REF %s \\p \\h
> ",bookmarkName);
> > +            else
> > +                DFFormatAttribute(concrete,WORD_INSTR," REF %s \\r \\h
> ",bookmarkName);
> > +            break;
> > +        }
> > +    }
> > +}
> > +
> > +ODFLens ODFFieldLens = {
> > +    .isVisible = ODFFieldIsVisible,
> > +    .get = ODFFieldGet,
> > +    .put = ODFFieldPut,
> > +    .create = ODFFieldCreate,
> > +    .remove = NULL, // LENS FIXME
> > +};
> > +
> > +*/
> >
> >
> http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/83720d9d/DocFormats/filters/ooxml/src/odf/lenses/ODFField.h
> > ----------------------------------------------------------------------
> > diff --git a/DocFormats/filters/ooxml/src/odf/lenses/ODFField.h
> b/DocFormats/filters/ooxml/src/odf/lenses/ODFField.h
> > new file mode 100644
> > index 0000000..99e4252
> > --- /dev/null
> > +++ b/DocFormats/filters/ooxml/src/odf/lenses/ODFField.h
> > @@ -0,0 +1,27 @@
> > +// Licensed to the Apache Software Foundation (ASF) under one
> > +// or more contributor license agreements.  See the NOTICE file
> > +// distributed with this work for additional information
> > +// regarding copyright ownership.  The ASF licenses this file
> > +// to you under the Apache License, Version 2.0 (the
> > +// "License"); you may not use this file except in compliance
> > +// with the License.  You may obtain a copy of the License at
> > +//
> > +//   http://www.apache.org/licenses/LICENSE-2.0
> > +//
> > +// Unless required by applicable law or agreed to in writing,
> > +// software distributed under the License is distributed on an
> > +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
> > +// KIND, either express or implied.  See the License for the
> > +// specific language governing permissions and limitations
> > +// under the License.
> > +
> > +#ifndef DocFormats_ODFField_h
> > +#define DocFormats_ODFField_h
> > +
> > +#include "ODFPackage.h"
> > +
> > +const char **ODF_parseField(const char *cstr);
> > +
> > +int ODF_simplifyFields(ODFPackage *package);
> > +
> > +#endif
> >
>
>


-- 
Visit my Coding Diary: http://gabriela-gibson.blogspot.com/

Re: incubator-corinthia git commit: Copy parts of src/word/lenses to src/odf/lenses/.

Posted by Peter Kelly <ke...@gmail.com>.
Also, the odf directory should not go under DocFormats/filters/ooxml/src.

There’s already a DocFormats/filters/odf/src directory with some files in it, including class definitions for ODFPackage, ODFSheet and the like.

I’ll try to find some time over the weekend to help you get up with an appropriate structure with a bunch of TODOs in the code.

--
Dr. Peter M. Kelly
kellypmk@gmail.com
http://www.kellypmk.net/

PGP key: http://www.kellypmk.net/pgp-key <http://www.kellypmk.net/pgp-key>
(fingerprint 5435 6718 59F0 DD1F BFA0 5E46 2523 BAA1 44AE 2966)

> On 24 Apr 2015, at 8:31 pm, gbg@apache.org wrote:
> 
> Repository: incubator-corinthia
> Updated Branches:
>  refs/heads/master ff322cb4f -> 83720d9dd
> 
> 
> Copy parts of src/word/lenses to src/odf/lenses/.
> 
> * DocFormats/filters/odf/src/text/ODFText.c
> 
>  (ODFTextGet): Uncomment section.  Function now 'works'.
> 
> * DocFormats/filters/ooxml/CMakeLists.txt
> 
>  (set): Add new group: GroupOOXMLODFLenses.
>         Add files: src/odf/lenses/ODFField.*
> 
>  (add_library): Add groups GroupOOXMODF and GroupOOXMLODFLenses.
> 
> * DocFormats/filters/ooxml/src/odf/ODFConverter.c
> 
>  (#include): Add temporary stdio.h for easy debugging.
> 
>  (WordConverterGetConcrete): Change Error message.  Add temporary
>    magic number in lieu of OFFICE_DOCUMENT not seeming to be the
>    right choice.  Move commmenting out below ODF_simplifyFields()
>    call.
> 
> * DocFormats/filters/ooxml/src/odf/lenses/ODFField.h
> 
>  (): New file.  String replaced copy of  /word/lenses/
> 
> * DocFormats/filters/ooxml/src/odf/lenses/ODFField.c
> 
>  (): New file.  String replaced copy of  /word/lenses/
> 
> 
> Project: http://git-wip-us.apache.org/repos/asf/incubator-corinthia/repo
> Commit: http://git-wip-us.apache.org/repos/asf/incubator-corinthia/commit/83720d9d
> Tree: http://git-wip-us.apache.org/repos/asf/incubator-corinthia/tree/83720d9d
> Diff: http://git-wip-us.apache.org/repos/asf/incubator-corinthia/diff/83720d9d
> 
> Branch: refs/heads/master
> Commit: 83720d9ddad4d1021e3cf0b416ebd7a761771cae
> Parents: ff322cb
> Author: Gabriela Gibson <gb...@apache.org>
> Authored: Fri Apr 24 14:31:43 2015 +0100
> Committer: Gabriela Gibson <gb...@apache.org>
> Committed: Fri Apr 24 14:31:43 2015 +0100
> 
> ----------------------------------------------------------------------
> DocFormats/filters/odf/src/text/ODFText.c       |   4 +-
> DocFormats/filters/ooxml/CMakeLists.txt         |  10 +-
> DocFormats/filters/ooxml/src/odf/ODFConverter.c |  13 +-
> .../filters/ooxml/src/odf/lenses/ODFField.c     | 494 +++++++++++++++++++
> .../filters/ooxml/src/odf/lenses/ODFField.h     |  27 +
> 5 files changed, 540 insertions(+), 8 deletions(-)
> ----------------------------------------------------------------------
> 
> 
> http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/83720d9d/DocFormats/filters/odf/src/text/ODFText.c
> ----------------------------------------------------------------------
> diff --git a/DocFormats/filters/odf/src/text/ODFText.c b/DocFormats/filters/odf/src/text/ODFText.c
> index 683e2d6..417aea9 100644
> --- a/DocFormats/filters/odf/src/text/ODFText.c
> +++ b/DocFormats/filters/odf/src/text/ODFText.c
> @@ -41,7 +41,7 @@ DFDocument *ODFTextGet(DFStorage *concreteStorage, DFStorage *abstractStorage, c
>     ok = 1;
> 
> end:
> -    /*
> +
>     ODFPackageRelease(odfPackage);
>     if (ok) {
>         return htmlDoc;
> @@ -50,7 +50,7 @@ end:
>         DFDocumentRelease(htmlDoc);
>         return NULL;
>     }
> -    */
> +
>     return NULL;
> }
> 
> 
> http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/83720d9d/DocFormats/filters/ooxml/CMakeLists.txt
> ----------------------------------------------------------------------
> diff --git a/DocFormats/filters/ooxml/CMakeLists.txt b/DocFormats/filters/ooxml/CMakeLists.txt
> index 4696309..3ba0b01 100644
> --- a/DocFormats/filters/ooxml/CMakeLists.txt
> +++ b/DocFormats/filters/ooxml/CMakeLists.txt
> @@ -29,6 +29,13 @@ set(GroupOOXMLODF
>     src/odf/ODFConverter.c
>     src/odf/ODFConverter.h)
> 
> +###
> +## group ooxml odf lenses objects
> +###
> +set(GroupOOXMLODFLenses
> +    src/odf/lenses/ODFField.c
> +    src/odf/lenses/ODFField.h)
> +    
> 
> ###
> ## group ooxml word objects
> @@ -160,7 +167,8 @@ add_library(ooxml OBJECT
>     ${GroupOOXMLWordFormatting}
>     ${GroupOOXMLWordLenses}
>     ${GroupOOXMLWordTests}
> -    ${GroupOOXMLODF})
> +    ${GroupOOXMLODF}
> +    ${GroupOOXMLODFLenses})
> 
> source_group(src\\common           FILES ${GroupOOXMLCommon})
> source_group(src\\word             FILES ${GroupOOXMLWord})
> 
> http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/83720d9d/DocFormats/filters/ooxml/src/odf/ODFConverter.c
> ----------------------------------------------------------------------
> diff --git a/DocFormats/filters/ooxml/src/odf/ODFConverter.c b/DocFormats/filters/ooxml/src/odf/ODFConverter.c
> index c7d79e8..3a21567 100644
> --- a/DocFormats/filters/ooxml/src/odf/ODFConverter.c
> +++ b/DocFormats/filters/ooxml/src/odf/ODFConverter.c
> @@ -54,6 +54,8 @@
> #include <stdlib.h>
> #include <string.h>
> 
> +// 1i: debugging include --- remove
> +#include <stdio.h>
> /*
> static int isWhitespaceRun(DFNode *run)
> {
> @@ -688,20 +690,21 @@ DFNode *WordConverterGetConcrete(WordPutData *put, DFNode *abstract)
> 
> int ODFConverterGet(DFDocument *html, DFStorage *abstractStorage, ODFPackage *package, const char *idPrefix, DFError **error)
> {
> -    // 1i: contentDoc is a crude guess here.
>     if (package->contentDoc == NULL) {
> -        DFErrorFormat(error,"document.xml not found");
> +        DFErrorFormat(error,"content.xml not found");
>         return 0;
>     }
> 
> -    // 1i: asssuming that OFFIC means AOO and so the WORD_DOCUMENT equivalent is OFFICE_DOCUMENT
> -    DFNode *odfDocument = DFChildWithTag(package->contentDoc->docNode,OFFICE_DOCUMENT);
> +    // 1i: this line needs work on the xml tags.
> +    printf("OFFICE_DOCUMENT is %d\n", OFFICE_DOCUMENT);
> +    DFNode *odfDocument = DFChildWithTag(package->contentDoc->docNode,1469 /* magic number for what I found in gdb */);
>     if (odfDocument == NULL) {
>         DFErrorFormat(error,"odf:document not found");
>         return 0;
>     }
> +
> +    int haveFields = ODF_simplifyFields(package);
>     /*
> -    int haveFields = Word_simplifyFields(package);
>     Word_mergeRuns(package);
> 
>     WordConverter *converter = WordConverterNew(html,abstractStorage,package,idPrefix);
> 
> http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/83720d9d/DocFormats/filters/ooxml/src/odf/lenses/ODFField.c
> ----------------------------------------------------------------------
> diff --git a/DocFormats/filters/ooxml/src/odf/lenses/ODFField.c b/DocFormats/filters/ooxml/src/odf/lenses/ODFField.c
> new file mode 100644
> index 0000000..e7abf45
> --- /dev/null
> +++ b/DocFormats/filters/ooxml/src/odf/lenses/ODFField.c
> @@ -0,0 +1,494 @@
> +// Licensed to the Apache Software Foundation (ASF) under one
> +// or more contributor license agreements.  See the NOTICE file
> +// distributed with this work for additional information
> +// regarding copyright ownership.  The ASF licenses this file
> +// to you under the Apache License, Version 2.0 (the
> +// "License"); you may not use this file except in compliance
> +// with the License.  You may obtain a copy of the License at
> +//
> +//   http://www.apache.org/licenses/LICENSE-2.0
> +//
> +// Unless required by applicable law or agreed to in writing,
> +// software distributed under the License is distributed on an
> +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
> +// KIND, either express or implied.  See the License for the
> +// specific language governing permissions and limitations
> +// under the License.
> +
> +#include "DFPlatform.h"
> +#include "ODFField.h"
> +/*
> +#include "ODFLenses.h"
> +#include "ODFBookmark.h"
> +#include "ODFObjects.h"
> +#include "ODFPackage.h"
> +#include "ODFCaption.h"
> +*/
> +
> +#include "DFDOM.h"
> +#include "DFXML.h"
> +#include "DFString.h"
> +#include "DFArray.h"
> +#include "DFCommon.h"
> +#include <assert.h>
> +#include <ctype.h>
> +#include <stdlib.h>
> +#include <string.h>
> +
> +/*
> +static void ODFFieldPut(ODFPutData *put, DFNode *abstract, DFNode *concrete);
> +
> +const char **ODF_parseField(const char *str)
> +{
> +    size_t len = strlen(str);
> +    DFArray *components = DFArrayNew((DFCopyFunction)xstrdup,free);
> +
> +    size_t start = 0;
> +    int inString = 0;
> +    for (size_t pos = 0; pos <= len; pos++) {
> +        if (inString) {
> +            if ((pos == len) || (str[pos] == '"')) {
> +                char *comp = DFSubstring(str,start,pos);
> +                DFArrayAppend(components,(char *)comp);
> +                free(comp);
> +                start = pos+1;
> +                inString = 0;
> +            }
> +        }
> +        else {
> +            if ((pos == len) || isspace(str[pos])) {
> +                if (pos > start) {
> +                    char *comp = DFSubstring(str,start,pos);
> +                    DFArrayAppend(components,(char *)comp);
> +                    free(comp);
> +                }
> +                start = pos+1;
> +            }
> +            else if (str[pos] == '"') {
> +                inString = 1;
> +                start = pos+1;
> +            }
> +        }
> +    }
> +
> +    const char **result = DFStringArrayFlatten(components);
> +    DFArrayRelease(components);
> +    return result;
> +}
> +
> +////////////////////////////////////////////////////////////////////////////////////////////////////
> +//                                                                                                //
> +//                                       DOM helper methods                                       //
> +//                                                                                                //
> +////////////////////////////////////////////////////////////////////////////////////////////////////
> +
> +typedef struct {
> +    DFNode *commonAncestor;
> +    DFNode *beginAncestor;
> +    DFNode *endAncestor;
> +} CommonAncestorInfo;
> +
> +static CommonAncestorInfo findCommonAncestor(DFNode *beginNode, DFNode *endNode)
> +{
> +    CommonAncestorInfo info = { NULL, NULL, NULL };
> +    for (DFNode *beginA = beginNode; beginA != NULL; beginA = beginA->parent) {
> +        for (DFNode *endA = endNode; endA != NULL; endA = endA->parent) {
> +            if (beginA->parent == endA->parent) {
> +                info.commonAncestor = beginA->parent;
> +                info.beginAncestor = beginA;
> +                info.endAncestor = endA;
> +                return info;
> +            }
> +        }
> +    }
> +    return info;
> +}
> +
> +static void removeNodes(DFNode *beginNode, DFNode *endNode)
> +{
> +    CommonAncestorInfo common = findCommonAncestor(beginNode,endNode);
> +    assert(common.commonAncestor != NULL);
> +    assert(common.beginAncestor != NULL);
> +    assert(common.endAncestor != NULL);
> +
> +    DFNode *begin = beginNode;
> +    while (begin != common.beginAncestor) {
> +        DFNode *parent = begin->parent;
> +        if (begin->next != NULL)
> +            DFRemoveNode(begin->next);
> +        else
> +            begin = parent;
> +    }
> +
> +    DFNode *end = endNode;
> +    while (end != common.endAncestor) {
> +        DFNode *parent = end->parent;
> +        if (end->prev != NULL)
> +            DFRemoveNode(end->prev);
> +        else
> +            end = parent;
> +    }
> +
> +    if (common.beginAncestor != common.endAncestor) {
> +        while (common.beginAncestor->next != common.endAncestor)
> +            DFRemoveNode(common.beginAncestor->next);
> +    }
> +
> +    while ((beginNode != NULL) && (beginNode->first == NULL) && (beginNode->tag != WORD_DOCUMENT)) {
> +        DFNode *parent = beginNode->parent;
> +        DFRemoveNode(beginNode);
> +        beginNode = parent;
> +    }
> +
> +    while ((endNode != NULL) && (endNode->first == NULL) && (endNode->tag != WORD_DOCUMENT)) {
> +        DFNode *parent = endNode->parent;
> +        DFRemoveNode(endNode);
> +        endNode = parent;
> +    }
> +}
> +*/
> +////////////////////////////////////////////////////////////////////////////////////////////////////
> +//                                                                                                //
> +//                                       ODFSimplification                                       //
> +//                                                                                                //
> +////////////////////////////////////////////////////////////////////////////////////////////////////
> +
> +typedef struct ODFSimplification {
> +    DFBuffer *instrText;
> +    DFNode *beginNode;
> +    DFNode *endNode;
> +    int haveFields;
> +    int inSeparate;
> +    int depth;
> +} ODFSimplification;
> +
> +static void replaceField(ODFSimplification *simp)
> +{
> +    assert(simp->instrText != NULL);
> +    assert(simp->beginNode != NULL);
> +    assert(simp->endNode != NULL);
> +
> +    if ((simp->beginNode->parent->tag == WORD_R) && (simp->endNode->parent->tag == WORD_R)) {
> +        DFNode *beginRun = simp->beginNode->parent;
> +
> +        DFNode *simple = DFCreateElement(simp->beginNode->doc,WORD_FLDSIMPLE);
> +        DFSetAttribute(simple,WORD_INSTR,simp->instrText->data);
> +        DFInsertBefore(beginRun->parent,simple,beginRun);
> +        // 1i: plug in later
> +        //removeNodes(simp->beginNode,simp->endNode);
> +    }
> +
> +    DFBufferRelease(simp->instrText);
> +    simp->instrText = NULL;
> +    simp->beginNode = NULL;
> +    simp->endNode = NULL;
> +
> +    simp->haveFields = 1;
> +}
> +
> +static void simplifyRecursive(ODFSimplification *simp, DFNode *node)
> +{
> +    switch (node->tag) {
> +        case WORD_FLDCHAR: {
> +            const char *type = DFGetAttribute(node,WORD_FLDCHARTYPE);
> +            if (DFStringEquals(type,"begin")) {
> +                if (simp->depth == 0) {
> +                    DFBufferRelease(simp->instrText);
> +                    simp->instrText = DFBufferNew();
> +                    simp->beginNode = node;
> +                    simp->endNode = NULL;
> +                    simp->inSeparate = 0;
> +                }
> +                simp->depth++;
> +            }
> +            else if (DFStringEquals(type,"end") && (simp->depth > 0)) {
> +                simp->depth--;
> +                if (simp->depth == 0) {
> +                    simp->endNode = node;
> +                    replaceField(simp);
> +                }
> +            }
> +            else if (DFStringEquals(type,"separate")) {
> +                if (simp->depth == 1)
> +                    simp->inSeparate = 1;
> +            }
> +            break;
> +        }
> +        case WORD_INSTRTEXT: {
> +            if ((simp->depth == 1) && !simp->inSeparate) {
> +                char *value = DFNodeTextToString(node);
> +                DFBufferFormat(simp->instrText,"%s",value);
> +                free(value);
> +            }
> +            break;
> +        }
> +    }
> +
> +    DFNode *next;
> +    for (DFNode *child = node->first; child != NULL; child = next) {
> +        next = child->next;
> +        simplifyRecursive(simp,child);
> +    }
> +}
> +
> +int ODF_simplifyFields(ODFPackage *package)
> +{
> +    ODFSimplification simp;
> +    bzero(&simp,sizeof(ODFSimplification));
> +    simplifyRecursive(&simp,package->contentDoc->docNode);
> +    DFBufferRelease(simp.instrText);
> +    return simp.haveFields;
> +}
> +
> +////////////////////////////////////////////////////////////////////////////////////////////////////
> +//                                                                                                //
> +//                                          ODFFieldLens                                         //
> +//                                                                                                //
> +////////////////////////////////////////////////////////////////////////////////////////////////////
> +/*
> +typedef enum {
> +    ODFRefUnknown,
> +    ODFRefNum,
> +    ODFRefText,
> +    ODFRefDirection,
> +    ODFRefCaptionText,
> +    ODFRefLabelNum,
> +} ODFRefType;
> +
> +static const char *ODFRefTypeClassName(ODFRefType refType)
> +{
> +    switch (refType) {
> +        case ODFRefText:
> +            return DFRefTextClass;
> +        case ODFRefDirection:
> +            return DFRefDirectionClass;
> +        case ODFRefCaptionText:
> +            return DFRefCaptionTextClass;
> +        case ODFRefLabelNum:
> +            return DFRefLabelNumClass;
> +        case ODFRefNum:
> +        default:
> +            return DFRefNumClass;
> +    }
> +}
> +
> +static ODFRefType ODFRefTypeGet(const char **args, ODFBookmark *bookmark)
> +{
> +    size_t argCount = DFStringArrayCount(args);
> +    ODFRefType type = ODFRefText;
> +
> +    for (size_t argno = 2; argno < argCount; argno++) {
> +        const char *arg = args[argno];
> +        if (!strcmp(arg,"\\r"))
> +            type = ODFRefNum; // Numbered reference (normal)
> +        else if (!strcmp(arg,"\\n"))
> +            type = ODFRefNum; // Numbered reference (no context)
> +        else if (!strcmp(arg,"\\w"))
> +            type = ODFRefNum; // Numbered reference (full context)
> +        else if (!strcmp(arg,"\\p"))
> +            type = ODFRefDirection;
> +    }
> +
> +    if ((bookmark->type == ODFBookmarkTable) ||
> +        (bookmark->type == ODFBookmarkFigure) ||
> +        (bookmark->type == ODFBookmarkEquation)) {
> +        if (type == ODFRefText) {
> +            DFNode *p = ODFFindContainingParagraph(bookmark->element);
> +            if (p != NULL) {
> +                CaptionParts parts = ODFBookmarkGetCaptionParts(bookmark);
> +
> +                if (parts.beforeNum && !parts.num && !parts.afterNum)
> +                    type = ODFRefCaptionText;
> +                else if (parts.beforeNum && parts.num && !parts.afterNum)
> +                    type = ODFRefLabelNum;
> +            }
> +        }
> +    }
> +
> +    return type;
> +}
> +
> +static DFNode *ODFFieldGet(ODFGetData *get, DFNode *concrete)
> +{
> +    if (concrete->tag != WORD_FLDSIMPLE)
> +        return NULL;;
> +
> +    const char *instr = DFGetAttribute(concrete,WORD_INSTR);
> +    if (instr != NULL) {
> +        const char **args = ODF_parseField(instr);
> +        size_t argCount = DFStringArrayCount(args);
> +
> +        if ((argCount >= 2) && !strcmp(args[0],"REF")) {
> +            ODFBookmark *bookmark = ODFObjectsBookmarkWithName(get->conv->objects,args[1]);
> +            if ((bookmark != NULL) && (bookmark->target != NULL)) {
> +
> +                ODFRefType type = ODFRefTypeGet(args,bookmark);
> +
> +                DFNode *a = ODFConverterCreateAbstract(get,HTML_A,concrete);
> +                DFFormatAttribute(a,HTML_HREF,"#%s%u",get->conv->idPrefix,bookmark->target->seqNo);
> +                DFSetAttribute(a,HTML_CLASS,ODFRefTypeClassName(type));
> +
> +                free(args);
> +                return a;
> +            }
> +        }
> +        else if ((argCount >= 1) && !strcmp(args[0],"TOC")) {
> +
> +            if ((argCount >= 2) && !strcmp(args[1],"\\o")) {
> +                DFNode *nav = ODFConverterCreateAbstract(get,HTML_NAV,concrete);
> +                DFSetAttribute(nav,HTML_CLASS,DFTableOfContentsClass);
> +                free(args);
> +                return nav;
> +            }
> +            else if ((argCount >= 3) && !strcmp(args[1],"\\c")) {
> +                // FIXME: The names "Figure" and "Table" here will be different if the document
> +                // was created in a language other than English. We need to look through the
> +                // document to figure out which counter names are used in captions adjacent to
> +                // figures and tables to know what the counter names used in the document
> +                // actually are.
> +
> +                // Another option might be just to collect a static list of names used in all the
> +                // major languages and base the detection on that. These would need to be checked
> +                // with multiple versions of word, as the names used could in theory change
> +                // between releases.
> +
> +                // We should keep track of a set of "document parameters", which record the names
> +                // used for figure and table counters, as well as the prefixes used on numbered
> +                // figures and tables. The latter would correspond to the content property of the
> +                // caption::before and figcaption::before CSS rules.
> +
> +                if (!strcmp(args[2],"Figure")) {
> +                    DFNode *nav = ODFConverterCreateAbstract(get,HTML_NAV,concrete);
> +                    DFSetAttribute(nav,HTML_CLASS,DFListOfFiguresClass);
> +                    free(args);
> +                    return nav;
> +                }
> +                else if (!strcmp(args[2],"Table")) {
> +                    DFNode *nav = ODFConverterCreateAbstract(get,HTML_NAV,concrete);
> +                    DFSetAttribute(nav,HTML_CLASS,DFListOfTablesClass);
> +                    free(args);
> +                    return nav;
> +                }
> +            }
> +        }
> +
> +        DFNode *span = ODFConverterCreateAbstract(get,HTML_SPAN,concrete);
> +        DFSetAttribute(span,HTML_CLASS,DFFieldClass);
> +        DFNode *text = DFCreateTextNode(get->conv->html,instr);
> +        DFAppendChild(span,text);
> +        free(args);
> +        return span;
> +    }
> +    return NULL;
> +}
> +
> +static int ODFFieldIsVisible(ODFPutData *put, DFNode *concrete)
> +{
> +    return 1;
> +}
> +
> +static DFNode *ODFFieldCreate(ODFPutData *put, DFNode *abstract)
> +{
> +    DFNode *concrete = DFCreateElement(put->contentDoc,WORD_FLDSIMPLE);
> +    // fldSimple elements are required to have an instr attribute (even if it's empty), so set
> +    // it here in case update doesn't change it for some reason
> +    DFSetAttribute(concrete,WORD_INSTR,"");
> +    ODFFieldPut(put,abstract,concrete);
> +    put->conv->haveFields = 1;
> +    return concrete;
> +}
> +
> +static const char *bookmarkNameForHtmlId(ODFConverter *converter, const char *htmlId, const char *refClass)
> +{
> +    DFNode *htmlElem = DFElementForIdAttr(converter->html,htmlId);
> +    if (htmlElem == NULL)
> +        return NULL;
> +    switch (htmlElem->tag) {
> +        case HTML_H1:
> +        case HTML_H2:
> +        case HTML_H3:
> +        case HTML_H4:
> +        case HTML_H5:
> +        case HTML_H6: {
> +            DFNode *labelSpan = htmlElem->first;
> +            if ((labelSpan == NULL) || (labelSpan->tag != HTML_SPAN))
> +                return NULL;;
> +            const char *labelClass = DFGetAttribute(labelSpan,HTML_CLASS);
> +            if (!DFStringEquals(labelClass,DFBookmarkClass))
> +                return NULL;
> +            return DFGetAttribute(labelSpan,WORD_NAME);
> +        }
> +        case HTML_FIGURE:
> +        case HTML_TABLE: {
> +            ODFCaption *caption = ODFObjectsCaptionForTarget(converter->objects,htmlElem);
> +            if (caption == NULL)
> +                return NULL;
> +            if (DFStringEquals(refClass,DFRefTextClass) && (caption->textBookmark != NULL))
> +                return caption->textBookmark->bookmarkName;
> +            else if (DFStringEquals(refClass,DFRefLabelNumClass) && (caption->labelNumBookmark != NULL))
> +                return caption->labelNumBookmark->bookmarkName;
> +            else if (DFStringEquals(refClass,DFRefCaptionTextClass) && (caption->captionTextBookmark != NULL))
> +                return caption->captionTextBookmark->bookmarkName;
> +            else if (caption->textBookmark != NULL)
> +                return caption->textBookmark->bookmarkName; // default is entire caption
> +        }
> +        default:
> +            return NULL;
> +    }
> +}
> +
> +static void ODFFieldPut(ODFPutData *put, DFNode *abstract, DFNode *concrete)
> +{
> +    switch (abstract->tag) {
> +        case HTML_SPAN: {
> +            const char *className = DFGetAttribute(abstract,HTML_CLASS);
> +            if (!DFStringEquals(className,DFFieldClass))
> +                return;
> +            char *text = DFNodeTextToString(abstract);
> +            DFSetAttribute(concrete,WORD_INSTR,text);
> +            free(text);
> +            break;
> +        }
> +        case HTML_A: {
> +            const char *href = DFGetAttribute(abstract,HTML_HREF);
> +            if ((href == NULL) || (href[0] != '#'))
> +                return;;
> +
> +            const char *targetId = &href[1];
> +            const char *className = DFGetAttribute(abstract,HTML_CLASS);
> +            if (className == NULL)
> +                className = "";;
> +            const char *bookmarkName = bookmarkNameForHtmlId(put->conv,targetId,className);
> +            if (bookmarkName == NULL)
> +                return;;
> +
> +            DFNode *htmlElem = DFElementForIdAttr(put->conv->html,targetId);
> +            if ((htmlElem != NULL) && ((htmlElem->tag == HTML_TABLE) || (htmlElem->tag == HTML_FIGURE))) {
> +                if (!DFStringEquals(className,DFRefTextClass) &&
> +                    !DFStringEquals(className,DFRefLabelNumClass) &&
> +                    !DFStringEquals(className,DFRefCaptionTextClass))
> +                    className = DFRefTextClass;
> +            }
> +
> +            if (DFStringEquals(className,DFRefTextClass) ||
> +                DFStringEquals(className,DFRefLabelNumClass) ||
> +                DFStringEquals(className,DFRefCaptionTextClass))
> +                DFFormatAttribute(concrete,WORD_INSTR," REF %s \\h ",bookmarkName);
> +            else if (DFStringEquals(className,DFRefDirectionClass))
> +                DFFormatAttribute(concrete,WORD_INSTR," REF %s \\p \\h ",bookmarkName);
> +            else
> +                DFFormatAttribute(concrete,WORD_INSTR," REF %s \\r \\h ",bookmarkName);
> +            break;
> +        }
> +    }
> +}
> +
> +ODFLens ODFFieldLens = {
> +    .isVisible = ODFFieldIsVisible,
> +    .get = ODFFieldGet,
> +    .put = ODFFieldPut,
> +    .create = ODFFieldCreate,
> +    .remove = NULL, // LENS FIXME
> +};
> +
> +*/
> 
> http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/83720d9d/DocFormats/filters/ooxml/src/odf/lenses/ODFField.h
> ----------------------------------------------------------------------
> diff --git a/DocFormats/filters/ooxml/src/odf/lenses/ODFField.h b/DocFormats/filters/ooxml/src/odf/lenses/ODFField.h
> new file mode 100644
> index 0000000..99e4252
> --- /dev/null
> +++ b/DocFormats/filters/ooxml/src/odf/lenses/ODFField.h
> @@ -0,0 +1,27 @@
> +// Licensed to the Apache Software Foundation (ASF) under one
> +// or more contributor license agreements.  See the NOTICE file
> +// distributed with this work for additional information
> +// regarding copyright ownership.  The ASF licenses this file
> +// to you under the Apache License, Version 2.0 (the
> +// "License"); you may not use this file except in compliance
> +// with the License.  You may obtain a copy of the License at
> +//
> +//   http://www.apache.org/licenses/LICENSE-2.0
> +//
> +// Unless required by applicable law or agreed to in writing,
> +// software distributed under the License is distributed on an
> +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
> +// KIND, either express or implied.  See the License for the
> +// specific language governing permissions and limitations
> +// under the License.
> +
> +#ifndef DocFormats_ODFField_h
> +#define DocFormats_ODFField_h
> +
> +#include "ODFPackage.h"
> +
> +const char **ODF_parseField(const char *cstr);
> +
> +int ODF_simplifyFields(ODFPackage *package);
> +
> +#endif
>