You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@whimsical.apache.org by se...@apache.org on 2021/08/02 14:43:10 UTC

[whimsy] branch master updated: Handle widgets as well

This is an automated email from the ASF dual-hosted git repository.

sebb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/whimsy.git


The following commit(s) were added to refs/heads/master by this push:
     new 6f8c47f  Handle widgets as well
6f8c47f is described below

commit 6f8c47fcf580f5c606c3ea3a0159517c96929cd1
Author: Sebb <se...@apache.org>
AuthorDate: Mon Aug 2 15:43:01 2021 +0100

    Handle widgets as well
---
 www/secretary/iclaparser.rb | 37 ++++++++++++++++++++++++++-----------
 1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/www/secretary/iclaparser.rb b/www/secretary/iclaparser.rb
index 252caf2..b35bc94 100644
--- a/www/secretary/iclaparser.rb
+++ b/www/secretary/iclaparser.rb
@@ -134,6 +134,19 @@ module ICLAParser
     NAME2FIELD[pdfname.gsub(' ','').downcase] || pdfname
   end
 
+  def self.encode(val)
+    if val.bytes[0..1] == [254,255]
+      val = val.encode('utf-8','utf-16').strip
+    else
+      begin
+        val = val.encode('utf-8').strip
+      rescue Encoding::UndefinedConversionError
+        val = val.encode('utf-8','iso-8859-1').strip
+      end
+    end
+    val.gsub("\x7F",'') # Not sure where these originate
+  end
+
   # parse the PDF
   def self.parse(path)
     data=Hash.new
@@ -176,24 +189,26 @@ module ICLAParser
               # This is a hack; should really find the font def and use that
               if val
                 debug[key] = v.inspect
-                if val.bytes[0..1] == [254,255]
-                  val = val.encode('utf-8','utf-16').strip
-                else
-                  begin
-                    val = val.encode('utf-8').strip
-                  rescue Encoding::UndefinedConversionError
-                    val = val.encode('utf-8','iso-8859-1').strip
-                  end
-                end
-                val.gsub!("\x7F",'') # Not sure where these originate
+                val = encode(val)
                 if val.length > 0
-                  data[canon_field_name(key)] = val.gsub("\x7F",'') # Not sure where these originate
+                  data[canon_field_name(key)] = val
                 end
                 metadata[:dataSource]['Form'] = true
               end
             end
           end
+        elsif subtype == :Widget
+          key = v[:T]
+          val = v[:V].to_s # might be a symbol
+          if val
+            debug[key] = v.inspect
+            if val.length > 0
+              data[canon_field_name(key)] = val
+            end
+          end
         else
+          next if [:Catalog, :Font, :FontDescriptor].include? type
+          # p [k,type,subtype,v]
         end
       end # objects
       if freetext.size > 0