# Copyright (C) 2005 Network Applied Communication Laboratory Co., Ltd. # # This file is part of Rast. # See the file COPYING for redistribution information. # require File.join(File.dirname(__FILE__), "read-buckets-to-file") class ApplicationVndSunXmlWriter SUPPORTED_VERSION = 1 MIME_TYPE = "application/vnd.sun.xml.writer" EXTENSIONS = ["sxw"] include ReadBucketsToFile private def process_file(filter, mime_type, path) escaped_path = escape_shell(path) cmd_base = [ "unzip", "-p", escaped_path, ] IO.popen((cmd_base + ["content.xml"]).join(" "), "r") do |f| process_content_xml(filter, f) end IO.popen((cmd_base + ["meta.xml"]).join(" "), "r") do |f| process_meta_xml(filter, f) end end def process_content_xml(filter, f) next_brigade = Rast::Brigade.new f.read.scan(TEXT_REGEXP) do |text, | s = unescape_xml(text) next_brigade.insert_tail(Rast::TransientBucket.new(s + "\n")) end next_brigade.insert_tail(Rast::EOSBucket.new) filter.pass(next_brigade, "text/plain; charset=UTF-8") end def process_meta_xml(filter, f) s = f.read db_encoding = filter.db_encoding s.scan(TITLE_REGEXP) do |title, | property = Rast::EncodingConverter.convert_encoding("UTF-8", db_encoding, title) filter.set_property("title", unescape_xml(property)) end s.scan(AUTHOR_REGEXP) do |author, | property = Rast::EncodingConverter.convert_encoding("UTF-8", db_encoding, author) filter.set_property("author", unescape_xml(property)) end s.scan(META_REGEXP) do |name, value| property = Rast::EncodingConverter.convert_encoding("UTF-8", db_encoding, value) filter.set_property(name, unescape_xml(property)) end end def escape_shell(filename) return filename.gsub(/[\'\"\\ ]/, "\\\\\\&") end def unescape_xml(s) return s.gsub(UNESCAPE_REGEXP) do |name,| UNESCAPES[$1] end end UNESCAPES = { "amp" => "&", "lt" => "<", "gt" => ">", "apos" => "'", } UNESCAPE_REGEXP = Regexp.new("&(" + UNESCAPES.keys.join("|") + ");") TEXT_REGEXP = %r|(.*?)| TITLE_REGEXP = %r|(.*?)| AUTHOR_REGEXP = %r|(.*?)| META_REGEXP = %r|(.*?)| end