You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@uima.apache.org by ea...@apache.org on 2008/06/02 17:07:30 UTC

svn commit: r662456 - /incubator/uima/uimacpp/trunk/scriptators/uima.i

Author: eae
Date: Mon Jun  2 08:07:29 2008
New Revision: 662456

URL: http://svn.apache.org/viewvc?rev=662456&view=rev
Log:
UIMA-1041 Jeff Sorensen (creator of the scriptators) fix for the Pythonnator losing BOM on Linux

Modified:
    incubator/uima/uimacpp/trunk/scriptators/uima.i

Modified: incubator/uima/uimacpp/trunk/scriptators/uima.i
URL: http://svn.apache.org/viewvc/incubator/uima/uimacpp/trunk/scriptators/uima.i?rev=662456&r1=662455&r2=662456&view=diff
==============================================================================
--- incubator/uima/uimacpp/trunk/scriptators/uima.i (original)
+++ incubator/uima/uimacpp/trunk/scriptators/uima.i Mon Jun  2 08:07:29 2008
@@ -74,13 +74,18 @@
 }
 
 static bool ConvertUnicodeStringRef(const UnicodeStringRef &ref,
-	PyObject **rv) {
+        PyObject **rv) {
   if (sizeof(Py_UNICODE) == sizeof(UChar)) {
     *rv = PyUnicode_FromUnicode((const Py_UNICODE*) ref.getBuffer(),
-	ref.length());
+        ref.length());
   } else {
+    // test for big-endian, preset python decoder for native order
+    // this will prevent PyUnicode_DecodeUTF16 from deleting byte order marks
+    union { long l; char c[sizeof(long)]; } u;
+    u.l = 1;
+    int byteorder = (u.c[sizeof(long) - 1] == 1) ? 1 : -1;
     PyObject *r = PyUnicode_DecodeUTF16(
-       (const char *) ref.getBuffer(), ref.getSizeInBytes(), 0, 0);
+       (const char *) ref.getBuffer(), ref.getSizeInBytes(), 0, &byteorder);
     if (r==0) return false;
     *rv = r;
   }