You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@thrift.apache.org by je...@apache.org on 2013/09/04 22:35:21 UTC

git commit: THRIFT-2165: Thrift Compiler fails at input files with UTF-8 BOM

Updated Branches:
  refs/heads/master 657b933f2 -> 03d4944e8


THRIFT-2165: Thrift Compiler fails at input files with UTF-8 BOM

Patch: Jens Geyer


Project: http://git-wip-us.apache.org/repos/asf/thrift/repo
Commit: http://git-wip-us.apache.org/repos/asf/thrift/commit/03d4944e
Tree: http://git-wip-us.apache.org/repos/asf/thrift/tree/03d4944e
Diff: http://git-wip-us.apache.org/repos/asf/thrift/diff/03d4944e

Branch: refs/heads/master
Commit: 03d4944e851ace50ba89a721b1e2eb5f563bb111
Parents: 657b933
Author: Jens Geyer <je...@apache.org>
Authored: Wed Sep 4 22:34:41 2013 +0200
Committer: Jens Geyer <je...@apache.org>
Committed: Wed Sep 4 22:34:41 2013 +0200

----------------------------------------------------------------------
 compiler/cpp/src/main.cc | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/thrift/blob/03d4944e/compiler/cpp/src/main.cc
----------------------------------------------------------------------
diff --git a/compiler/cpp/src/main.cc b/compiler/cpp/src/main.cc
index f751419..a2350e6 100755
--- a/compiler/cpp/src/main.cc
+++ b/compiler/cpp/src/main.cc
@@ -846,6 +846,24 @@ bool validate_throws(t_struct* throws) {
 }
 
 /**
+ * Skips UTF-8 BOM if there is one
+ */
+bool skip_utf8_bom(FILE* f) {
+
+  // pretty straightforward, but works
+  if( fgetc(f) == 0xEF) {
+    if( fgetc(f) == 0xBB) {
+      if( fgetc(f) == 0xBF) {
+        return true;
+      } 
+    } 
+  } 
+  
+  rewind(f); 
+  return false;
+}
+
+/**
  * Parses a program
  */
 void parse(t_program* program, t_program* parent_program) {
@@ -857,11 +875,14 @@ void parse(t_program* program, t_program* parent_program) {
   g_curpath = path;
 
   // Open the file
+  // skip UTF-8 BOM if there is one
   yyin = fopen(path.c_str(), "r");
   if (yyin == 0) {
     failure("Could not open input file: \"%s\"", path.c_str());
   }
-
+  if( skip_utf8_bom( yyin))
+    pverbose("Skipped UTF-8 BOM at %s\n", path.c_str());
+  
   // Create new scope and scan for includes
   pverbose("Scanning %s for includes\n", path.c_str());
   g_parse_mode = INCLUDES;
@@ -891,10 +912,16 @@ void parse(t_program* program, t_program* parent_program) {
   g_parent_scope = (parent_program != NULL) ? parent_program->scope() : NULL;
   g_parent_prefix = program->get_name() + ".";
   g_curpath = path;
+
+  // Open the file
+  // skip UTF-8 BOM if there is one
   yyin = fopen(path.c_str(), "r");
   if (yyin == 0) {
     failure("Could not open input file: \"%s\"", path.c_str());
   }
+  if( skip_utf8_bom( yyin))
+    pverbose("Skipped UTF-8 BOM at %s\n", path.c_str());
+  
   pverbose("Parsing %s for types\n", path.c_str());
   yylineno = 1;
   try {