# Copyright (C) 2005 Network Applied Communication Laboratory Co., Ltd.
#
# This file is part of Rast.
# See the file COPYING for redistribution information.
#
require "test/unit"
require "rast_test"
require "test-utility"
require "make-variable"
require "rast/database-generatable"
require "tempfile"
require "nkf"
module Rast
class FilterChainTest < Test::Unit::TestCase
include DatabaseGeneratable
def test_invoke
invoke_test_simple
invoke_test_nil_mime_type
invoke_test_with_filename
invoke_test_only_eos_bucket
invoke_test_c_mime_filter
invoke_test_c_mime_filter_with_charset_conversion
invoke_test_c_mime_filter_with_properties
invoke_test_c_text_filter
invoke_test_ruby_mime_filter
invoke_test_ruby_mime_filter_with_multiple_stage
invoke_test_ruby_mime_filter_with_charset_conversion
invoke_test_ruby_text_filter
invoke_test_no_such_text_filter
invoke_test_with_invalid_charset
invoke_test_protect_filter_instance
end
def invoke_test_simple
create_options = {
"encoding" => "utf8",
"preserve_text" => true,
"properties" => [],
}
db_name = generate_db_name
LocalDB.create(db_name, create_options)
LocalDB.open(db_name, Rast::DB::RDWR) do |db|
doc = db.create_document
chain = FilterChain.new(doc)
brigade = Brigade.new
brigade.insert_tail(TransientBucket.new("本日は晴天なり"))
brigade.insert_tail(EOSBucket.new)
chain.invoke(brigade, "text/plain")
doc.commit
doc = db.create_document
chain = FilterChain.new(doc)
assert_raises(RastError) do
chain.invoke(brigade, "unknown/mime-type")
end
end
LocalDB.open(db_name) do |db|
assert_equal("本日は晴天なり", db.get_text(1))
end
end
def invoke_test_nil_mime_type
create_options = {
"encoding" => "utf8",
"preserve_text" => true,
"properties" => [],
}
db_name = generate_db_name
LocalDB.create(db_name, create_options)
LocalDB.open(db_name, Rast::DB::RDWR) do |db|
doc = db.create_document
chain = FilterChain.new(doc)
brigade = Brigade.new
brigade.insert_tail(TransientBucket.new("本日は晴天なり"))
brigade.insert_tail(EOSBucket.new)
chain.invoke(brigade, nil)
doc.commit
doc = db.create_document
chain = FilterChain.new(doc)
brigade = Brigade.new
filename = File.join(MakeVariable::TOP_SRCDIR, "tests", "data",
"filter", "ja", "utf-8.txt.tar")
File.open(filename) do |f|
brigade.insert_tail(FileBucket.new(f))
brigade.insert_tail(EOSBucket.new)
chain.invoke(brigade, nil)
doc.commit
end
end
LocalDB.open(db_name) do |db|
assert_equal("本日は晴天なり", db.get_text(1))
assert_match(/tar テスト これは tar ファイルの1つ目です/,
db.get_text(2))
assert_match(/tar テスト これは tar ファイルの2つ目です/,
db.get_text(2))
end
end
def invoke_test_with_filename
create_options = {
"encoding" => "utf8",
"preserve_text" => true,
"properties" => [],
}
db_name = generate_db_name
LocalDB.create(db_name, create_options)
names = ["open-office-org.sxw", "excel.xls", "powerpoint.ppt"]
LocalDB.open(db_name, Rast::DB::RDWR) do |db|
names.each do |name|
filename = File.join(MakeVariable::TOP_SRCDIR, "tests", "data",
"filter", "ja", name)
File.open(filename) do |f|
doc = db.create_document
chain = FilterChain.new(doc)
brigade = Brigade.new
brigade.insert_tail(FileBucket.new(f))
brigade.insert_tail(EOSBucket.new)
chain.invoke(brigade, nil, filename)
doc.commit
end
end
end
LocalDB.open(db_name) do |db|
assert_match(/これはWriterファイルです/, db.get_text(1))
assert_match(/これはMS Excelファイルです/, db.get_text(2))
assert_match(/これは MS PowerPoint ファイルです/, db.get_text(3))
end
end
def invoke_test_only_eos_bucket
create_options = {
"encoding" => "utf8",
"preserve_text" => true,
"properties" => [],
}
db_name = generate_db_name
LocalDB.create(db_name, create_options)
LocalDB.open(db_name, Rast::DB::RDWR) do |db|
doc = db.create_document
chain = FilterChain.new(doc)
brigade = Brigade.new
brigade.insert_tail(EOSBucket.new)
chain.invoke(brigade, nil)
doc.commit
end
LocalDB.open(db_name) do |db|
assert_equal("", db.get_text(1))
end
end
def invoke_test_c_mime_filter
create_options = {
"encoding" => "utf8",
"preserve_text" => true,
"properties" => [],
}
db_name = generate_db_name
LocalDB.create(db_name, create_options)
LocalDB.open(db_name, Rast::DB::RDWR) do |db|
s = ""
s.concat("
天気")
s.concat("本日は晴天なり")
doc = db.create_document
chain = FilterChain.new(doc)
brigade = Brigade.new
brigade.insert_tail(TransientBucket.new(s))
brigade.insert_tail(EOSBucket.new)
chain.invoke(brigade, "text/x-rast-test-html")
doc.commit
end
LocalDB.open(db_name, Rast::DB::RDONLY) do |db|
assert_equal("本日は晴天なり", db.get_text(1))
end
end
def invoke_test_c_mime_filter_with_charset_conversion
create_options = {
"encoding" => "euc_jp",
"preserve_text" => true,
"properties" => [
{
"name" => "title",
"type" => Rast::PROPERTY_TYPE_STRING,
"search" => false,
"text_search" => false,
"full_text_search" => false,
"unique" => false,
},
]
}
db_name = generate_db_name
LocalDB.create(db_name, create_options)
LocalDB.open(db_name, Rast::DB::RDWR) do |db|
s = ""
s.concat("")
s.concat("")
s.concat("題名のエンコーディング変換テスト")
s.concat("")
s.concat("本文のエンコーディング変換テスト")
s.concat("")
doc = db.create_document
chain = FilterChain.new(doc)
brigade = Brigade.new
brigade.insert_tail(TransientBucket.new(NKF.nkf("-Ws", s)))
brigade.insert_tail(EOSBucket.new)
chain.invoke(brigade, "text/x-rast-test-html; charset=Shift_JIS")
doc.commit
end
LocalDB.open(db_name, Rast::DB::RDONLY) do |db|
assert_equal("本文のエンコーディング変換テスト",
NKF.nkf("-Ew", db.get_text(1)))
result = db.search(NKF.nkf("-We", "本文"), {"properties" => ["title"]})
assert_equal("題名のエンコーディング変換テスト",
NKF.nkf("-Ew", result.items[0].properties.title))
end
end
def invoke_test_c_mime_filter_with_properties
create_options = {
"encoding" => "utf8",
"preserve_text" => true,
"properties" => [
{
"name" => "title",
"type" => Rast::PROPERTY_TYPE_STRING,
"search" => false,
"text_search" => false,
"full_text_search" => false,
"unique" => false,
}
],
}
db_name = generate_db_name
LocalDB.create(db_name, create_options)
LocalDB.open(db_name, Rast::DB::RDWR) do |db|
s = ""
s.concat("天気")
s.concat("本日は晴天なり")
doc = db.create_document
chain = FilterChain.new(doc)
brigade = Brigade.new
brigade.insert_tail(TransientBucket.new(s))
brigade.insert_tail(EOSBucket.new)
chain.invoke(brigade, "text/x-rast-test-html")
doc.commit
end
LocalDB.open(db_name, Rast::DB::RDONLY) do |db|
result = db.search("本日", {"properties" => ["title"]})
assert_equal(1, result.hit_count)
assert_equal(1, result.num_docs)
assert_equal("本日", result.terms[0].term)
assert_equal(1, result.terms[0].doc_count)
assert_equal(1, result.terms.length)
assert_equal(1, result.items[0].doc_id)
assert_equal("天気", result.items[0].properties[0])
end
end
def invoke_test_ruby_mime_filter
create_options = {
"encoding" => "utf8",
"preserve_text" => true,
"properties" => [
{
"name" => "from",
"type" => Rast::PROPERTY_TYPE_STRING,
"search" => false,
"text_search" => false,
"full_text_search" => false,
"unique" => false,
},
{
"name" => "to",
"type" => Rast::PROPERTY_TYPE_STRING,
"search" => false,
"text_search" => false,
"full_text_search" => false,
"unique" => false,
},
{
"name" => "subject",
"type" => Rast::PROPERTY_TYPE_STRING,
"search" => false,
"text_search" => false,
"full_text_search" => false,
"unique" => false,
},
],
}
db_name = generate_db_name
LocalDB.create(db_name, create_options)
LocalDB.open(db_name, Rast::DB::RDWR) do |db|
s = ""
s.concat("From: rast@example.com\n")
s.concat("To: rast-dev@example.com\n")
s.concat("Subject: test mail\n")
s.concat("\n")
s.concat("本日は晴天なり\n")
s.concat("明日は雨です\n")
doc = db.create_document
chain = FilterChain.new(doc)
brigade = Brigade.new
brigade.insert_tail(TransientBucket.new(s))
brigade.insert_tail(EOSBucket.new)
chain.invoke(brigade, nil)
doc.commit
end
LocalDB.open(db_name, Rast::DB::RDONLY) do |db|
assert_equal("本日は晴天なり 明日は雨です ", db.get_text(1))
result = db.search("本日",
{"properties" => ["from", "to", "subject"]})
assert_equal(1, result.hit_count)
assert_equal(1, result.num_docs)
assert_equal("本日", result.terms[0].term)
assert_equal(1, result.terms[0].doc_count)
assert_equal(1, result.terms.length)
assert_equal(1, result.items[0].doc_id)
assert_equal("rast@example.com", result.items[0].properties[0])
assert_equal("rast-dev@example.com", result.items[0].properties[1])
assert_equal("test mail", result.items[0].properties[2])
end
end
def invoke_test_ruby_mime_filter_with_multiple_stage
create_options = {
"encoding" => "utf8",
"preserve_text" => true,
"properties" => [
{
"name" => "title",
"type" => Rast::PROPERTY_TYPE_STRING,
"search" => false,
"text_search" => false,
"full_text_search" => false,
"unique" => false,
},
{
"name" => "author",
"type" => Rast::PROPERTY_TYPE_STRING,
"search" => false,
"text_search" => false,
"full_text_search" => false,
"unique" => false,
}
],
}
path = File.join(MakeVariable::TOP_SRCDIR, "tests", "data", "filter",
"ja", "msword.doc")
db_name = generate_db_name
LocalDB.create(db_name, create_options)
LocalDB.open(db_name, Rast::DB::RDWR) do |db|
File.open(path) do |f|
doc = db.create_document
chain = FilterChain.new(doc)
brigade = Brigade.new
brigade.insert_tail(FileBucket.new(f))
brigade.insert_tail(EOSBucket.new)
chain.invoke(brigade, "application/msword")
doc.commit
end
end
LocalDB.open(db_name, Rast::DB::RDONLY) do |db|
text = db.get_text(1)
assert_match(/\sMicrosoft Word テスト\s/, text)
assert_match(/\sこれはMS Word ファイルです\s/, text)
assert_match(/\s1ページ目本文\s/, text)
assert_match(/\s2ページ目本文\s/, text)
result = db.search("word",
{
"need_summary" => true,
"properties" => ["author", "title"]
})
assert_equal("著者", result.items[0].properties.author)
assert_equal("タイトル", result.items[0].properties.title)
assert_equal(2, result.items[0].properties.length)
end
end
def invoke_test_ruby_mime_filter_with_charset_conversion
create_options = {
"encoding" => "euc_jp",
"preserve_text" => true,
"properties" => [
{
"name" => "subject",
"type" => Rast::PROPERTY_TYPE_STRING,
"search" => false,
"text_search" => false,
"full_text_search" => false,
"unique" => false,
},
],
}
db_name = generate_db_name
LocalDB.create(db_name, create_options)
LocalDB.open(db_name, Rast::DB::RDWR) do |db|
s = ""
s.concat("Subject: 題名のエンコーディング変換テスト\n")
s.concat("\n")
s.concat("本文のエンコーディング変換テスト")
doc = db.create_document
chain = FilterChain.new(doc)
brigade = Brigade.new
brigade.insert_tail(TransientBucket.new(NKF.nkf("-Ws", s)))
brigade.insert_tail(EOSBucket.new)
chain.invoke(brigade, "message/rfc822; charset=Shift_JIS")
doc.commit
end
LocalDB.open(db_name) do |db|
assert_equal("本文のエンコーディング変換テスト",
NKF.nkf("-Ew", db.get_text(1)))
result = db.search(NKF.nkf("-We", "本文"),
{"properties" => ["subject"]})
assert_equal("題名のエンコーディング変換テスト",
NKF.nkf("-Ew", result.items[0].properties.subject))
end
end
def invoke_test_c_text_filter
create_options = {
"encoding" => "utf8",
"preserve_text" => true,
"properties" => [],
}
db_name = generate_db_name
LocalDB.create(db_name, create_options)
LocalDB.open(db_name, Rast::DB::RDWR) do |db|
s = ""
s.concat(" Ruby-\n.")
s.concat(" Pyth-\non.")
s.concat(" -\nPerl.")
register_with_text_filters(db, ["combine-lineend-hyphen"], s)
register_with_text_filters(db, ["combine-lineend-hyphen"], "-Scheme.")
register_with_text_filters(db, ["combine-lineend-hyphen"], "Haskell-")
end
LocalDB.open(db_name, Rast::DB::RDONLY) do |db|
assert_equal(" Ruby- . Python. - Perl.", db.get_text(1))
assert_equal("-Scheme.", db.get_text(2))
assert_equal("Haskell-", db.get_text(3))
end
end
def invoke_test_ruby_text_filter
create_options = {
"encoding" => "utf8",
"preserve_text" => true,
"properties" => [],
}
db_name = generate_db_name
LocalDB.create(db_name, create_options)
LocalDB.open(db_name, Rast::DB::RDWR) do |db|
s = ""
s.concat("本日は\n晴天なり。\n")
s.concat("明日は\n雨天なり。\n")
s.concat("This is\na pen.")
register_with_text_filters(db, ["combine-lineend-japanese"], s)
end
LocalDB.open(db_name, Rast::DB::RDONLY) do |db|
assert_equal("本日は晴天なり。明日は雨天なり。 This is a pen.",
db.get_text(1))
end
end
def invoke_test_no_such_text_filter
create_options = {
"encoding" => "utf8",
"preserve_text" => true,
"properties" => [],
}
db_name = generate_db_name
LocalDB.create(db_name, create_options)
LocalDB.open(db_name, Rast::DB::RDWR) do |db|
doc = db.create_document
assert_raise(RastError) do
FilterChain.new(doc, ["no-such-text-filter"])
end
end
end
def invoke_test_with_invalid_charset
create_options = {
"encoding" => "euc_jp",
"preserve_text" => true,
"properties" => [],
}
db_name = generate_db_name
LocalDB.create(db_name, create_options)
LocalDB.open(db_name, Rast::DB::RDWR) do |db|
doc = db.create_document
chain = FilterChain.new(doc)
brigade = Brigade.new
bucket = TransientBucket.new("不正エンコーディング名テスト")
brigade.insert_tail(bucket)
brigade.insert_tail(EOSBucket.new)
assert_raise(RastError) do
chain.invoke(brigade, "text/plain; charset=ISO-8859-1")
end
end
end
def invoke_test_protect_filter_instance
create_options = {
"encoding" => "utf8",
"preserve_text" => true,
"properties" => [],
}
db_name = generate_db_name
LocalDB.create(db_name, create_options)
LocalDB.open(db_name, Rast::DB::RDWR) do |db|
doc = db.create_document
chain = FilterChain.new(doc, ["rast-test-gc-start-invoke"])
brigade = Brigade.new
brigade.insert_tail(TransientBucket.new("test test"))
brigade.insert_tail(EOSBucket.new)
chain.invoke(brigade, "text/plain; charset=UTF-8")
doc.commit
end
LocalDB.open(db_name) do |db|
assert_equal("test test", db.get_text(1))
end
end
def register_with_text_filters(db, text_filters, s)
doc = db.create_document
chain = FilterChain.new(doc, text_filters)
brigade = Brigade.new
brigade.insert_tail(TransientBucket.new(s))
brigade.insert_tail(EOSBucket.new)
chain.invoke(brigade, "text/plain")
doc.commit
end
end
end