You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@whimsical.apache.org by se...@apache.org on 2021/08/02 14:43:10 UTC
[whimsy] branch master updated: Handle widgets as well
This is an automated email from the ASF dual-hosted git repository.
sebb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/whimsy.git
The following commit(s) were added to refs/heads/master by this push:
new 6f8c47f Handle widgets as well
6f8c47f is described below
commit 6f8c47fcf580f5c606c3ea3a0159517c96929cd1
Author: Sebb <se...@apache.org>
AuthorDate: Mon Aug 2 15:43:01 2021 +0100
Handle widgets as well
---
www/secretary/iclaparser.rb | 37 ++++++++++++++++++++++++++-----------
1 file changed, 26 insertions(+), 11 deletions(-)
diff --git a/www/secretary/iclaparser.rb b/www/secretary/iclaparser.rb
index 252caf2..b35bc94 100644
--- a/www/secretary/iclaparser.rb
+++ b/www/secretary/iclaparser.rb
@@ -134,6 +134,19 @@ module ICLAParser
NAME2FIELD[pdfname.gsub(' ','').downcase] || pdfname
end
+ def self.encode(val)
+ if val.bytes[0..1] == [254,255]
+ val = val.encode('utf-8','utf-16').strip
+ else
+ begin
+ val = val.encode('utf-8').strip
+ rescue Encoding::UndefinedConversionError
+ val = val.encode('utf-8','iso-8859-1').strip
+ end
+ end
+ val.gsub("\x7F",'') # Not sure where these originate
+ end
+
# parse the PDF
def self.parse(path)
data=Hash.new
@@ -176,24 +189,26 @@ module ICLAParser
# This is a hack; should really find the font def and use that
if val
debug[key] = v.inspect
- if val.bytes[0..1] == [254,255]
- val = val.encode('utf-8','utf-16').strip
- else
- begin
- val = val.encode('utf-8').strip
- rescue Encoding::UndefinedConversionError
- val = val.encode('utf-8','iso-8859-1').strip
- end
- end
- val.gsub!("\x7F",'') # Not sure where these originate
+ val = encode(val)
if val.length > 0
- data[canon_field_name(key)] = val.gsub("\x7F",'') # Not sure where these originate
+ data[canon_field_name(key)] = val
end
metadata[:dataSource]['Form'] = true
end
end
end
+ elsif subtype == :Widget
+ key = v[:T]
+ val = v[:V].to_s # might be a symbol
+ if val
+ debug[key] = v.inspect
+ if val.length > 0
+ data[canon_field_name(key)] = val
+ end
+ end
else
+ next if [:Catalog, :Font, :FontDescriptor].include? type
+ # p [k,type,subtype,v]
end
end # objects
if freetext.size > 0