# Copyright (C) 2005 Network Applied Communication Laboratory Co., Ltd. # # This file is part of Rast. # See the file COPYING for redistribution information. # # -*- mode: Ruby; coding: euc-japan; -*- require "test/unit" require "rast_test" module Rast class Encoding class MeCabEucJpTest < Test::Unit::TestCase def setup @encoding = Encoding["mecab_euc_jp"] end def test_register_tokenize result = [] @encoding.register_tokenize("ruby is great.") do |ngram, pos, complete| result.push([ngram, pos, complete]) end assert_equal(["ruby", 0, true], result[0]) assert_equal([" ", 4, true], result[1]) assert_equal(["is", 5, true], result[2]) assert_equal([" ", 7, true], result[3]) assert_equal(["great", 8, true], result[4]) assert_equal([".", 13, true], result[5]) assert_equal(6, result.length) result = [] @encoding.register_tokenize("松江市") do |ngram, pos, complete| result.push([ngram, pos, complete]) end assert_equal(["松江", 0, true], result[0]) assert_equal(["市", 2, true], result[1]) assert_equal(2, result.length) end def test_normalize_text assert_equal(" abc ", @encoding.normalize_text(" abc ")) assert_equal(" abc abc", @encoding.normalize_text(" abc\nabc")) assert_equal("a b c d e ", @encoding.normalize_text("a\n \t b\nc\r\rd \ne ")) s = @encoding.normalize_text("012ABC") assert_equal("012ABC", s) s = @encoding.normalize_text("アイウ") assert_equal("アイウ", s) s = @encoding.normalize_text("アイウアイウ") assert_equal("アイウアイウ", s) s = @encoding.normalize_text("タチツテト") assert_equal("タチツテト", s) s = @encoding.normalize_text("サシスセソタチツテト") assert_equal("サシスセソタチツテト", s) s = @encoding.normalize_text("ガギグゲゴ") assert_equal("ガギグゲゴ", s) s = @encoding.normalize_text("ダヂヅデド") assert_equal("ダヂヅデド", s) s = @encoding.normalize_text("パピプペポ") assert_equal("パピプペポ", s) s = @encoding.normalize_text("ヴ") assert_equal("ヴ", s) end def test_normalize_chars s = @encoding.normalize_chars("ABC") assert_equal("abc", s) end end end end