# Copyright (C) 2005 Network Applied Communication Laboratory Co., Ltd.
#
# This file is part of Rast.
# See the file COPYING for redistribution information.
#
require File.join(File.dirname(__FILE__), "read-buckets-to-file")
class ApplicationVndOasisOpendocumentText
SUPPORTED_VERSION = 1
MIME_TYPE = "application/vnd.oasis.opendocument.text"
EXTENSIONS = ["odt"]
include ReadBucketsToFile
private
def process_file(filter, mime_type, path)
escaped_path = escape_shell(path)
cmd_base = [
"unzip", "-p", escaped_path,
]
IO.popen((cmd_base + ["content.xml"]).join(" "), "r") do |f|
process_content_xml(filter, f)
end
IO.popen((cmd_base + ["meta.xml"]).join(" "), "r") do |f|
process_meta_xml(filter, f)
end
end
def process_content_xml(filter, f)
next_brigade = Rast::Brigade.new
f.read.scan(TEXT_REGEXP) do |tmp, text|
s = text.gsub(/<(.*?)>/, "")
s = unescape_xml(s)
next_brigade.insert_tail(Rast::TransientBucket.new(s + "\n"))
end
next_brigade.insert_tail(Rast::EOSBucket.new)
filter.pass(next_brigade, "text/plain; charset=UTF-8")
end
def process_meta_xml(filter, f)
s = f.read
db_encoding = filter.db_encoding
s.scan(TITLE_REGEXP) do |title, |
property = Rast::EncodingConverter.convert_encoding("UTF-8", db_encoding,
title)
filter.set_property("title", unescape_xml(property))
end
s.scan(AUTHOR_REGEXP) do |author, |
property = Rast::EncodingConverter.convert_encoding("UTF-8", db_encoding,
author)
filter.set_property("author", unescape_xml(property))
end
s.scan(META_REGEXP) do |name, value|
property = Rast::EncodingConverter.convert_encoding("UTF-8", db_encoding,
value)
filter.set_property(name, unescape_xml(property))
end
end
def escape_shell(filename)
return filename.gsub(/[\'\"\\ ]/, "\\\\\\&")
end
def unescape_xml(s)
return s.gsub(UNESCAPE_REGEXP) do |name,|
UNESCAPES[$1]
end
end
UNESCAPES = {
"amp" => "&",
"lt" => "<",
"gt" => ">",
"apos" => "'",
}
UNESCAPE_REGEXP = Regexp.new("&(" + UNESCAPES.keys.join("|") + ");")
TEXT_REGEXP = %r|(.*?)|
TITLE_REGEXP = %r|(.*?)|
AUTHOR_REGEXP = %r|(.*?)|
META_REGEXP = %r|(.*?)|
end