You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by ea...@apache.org on 2008/06/02 17:07:30 UTC
svn commit: r662456 - /incubator/uima/uimacpp/trunk/scriptators/uima.i
Author: eae
Date: Mon Jun 2 08:07:29 2008
New Revision: 662456
URL: http://svn.apache.org/viewvc?rev=662456&view=rev
Log:
UIMA-1041 Jeff Sorensen (creator of the scriptators) fix for the Pythonnator losing BOM on Linux
Modified:
incubator/uima/uimacpp/trunk/scriptators/uima.i
Modified: incubator/uima/uimacpp/trunk/scriptators/uima.i
URL: http://svn.apache.org/viewvc/incubator/uima/uimacpp/trunk/scriptators/uima.i?rev=662456&r1=662455&r2=662456&view=diff
==============================================================================
--- incubator/uima/uimacpp/trunk/scriptators/uima.i (original)
+++ incubator/uima/uimacpp/trunk/scriptators/uima.i Mon Jun 2 08:07:29 2008
@@ -74,13 +74,18 @@
}
static bool ConvertUnicodeStringRef(const UnicodeStringRef &ref,
- PyObject **rv) {
+ PyObject **rv) {
if (sizeof(Py_UNICODE) == sizeof(UChar)) {
*rv = PyUnicode_FromUnicode((const Py_UNICODE*) ref.getBuffer(),
- ref.length());
+ ref.length());
} else {
+ // test for big-endian, preset python decoder for native order
+ // this will prevent PyUnicode_DecodeUTF16 from deleting byte order marks
+ union { long l; char c[sizeof(long)]; } u;
+ u.l = 1;
+ int byteorder = (u.c[sizeof(long) - 1] == 1) ? 1 : -1;
PyObject *r = PyUnicode_DecodeUTF16(
- (const char *) ref.getBuffer(), ref.getSizeInBytes(), 0, 0);
+ (const char *) ref.getBuffer(), ref.getSizeInBytes(), 0, &byteorder);
if (r==0) return false;
*rv = r;
}