polardbxengine/mysql-test/suite/innodb_fts/include/mecab.inc

252 lines
8.5 KiB
SQL
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#
#-----------------------------------------------------------------------------
# wl6607 : InnoDB FULLTEXT SEARCH: CJK support (mecab parser)
# Adding FTS check with mecab parser
# - Table with mecab parser
# - Basic FTS query ( mainly BOOLEAN MODE check)
# - FTS index with single and two columns
# - external parser index using alter table
#
#------------------------------------------------------------------------------
call mtr.add_suppression("unknown variable 'loose_mecab_rc_file");
call mtr.add_suppression("Fulltext index charset 'big5' doesn't match mecab charset");
call mtr.add_suppression("Mecab: createModel.* failed: param.cpp");
call mtr.add_suppression("Plugin 'mecab' init function returned error");
-- disable_query_log
-- error 0, ER_CANT_INITIALIZE_UDF
# Restart with package dictionary
let $MYSQL_DATADIR=`select @@datadir`;
let MYSQL_BASEDIR=`select @@basedir`;
let MECABRC=$MYSQL_DATADIR/mecabrc;
let DICDIR=ipadic_$ipadic_charset;
# In standard layout libdir is
# $basedir/lib
# while RPMS (el and sles) uses
# $basedir/lib/mysql
# $basedir/lib64/mysql
#
# Fedora, Debian and Ubuntu uses system mecab
#
# Fedora has
# UTF-8 in %{_libdir}/mecab/dic/ipadic
# EUCJP in %{_libdir}/mecab/dic/ipadic-EUCJP
#
# Debian and Ubuntu have
# UTF-8 in /var/lib/mecab/dic/ipadic-utf8
# EUCJP in /var/lib/mecab/dic/ipadic
#
--perl
use strict;
use lib "lib/";
use My::Find;
my $dir = "$ENV{'DICDIR'}";
my $basedir = $ENV{'MYSQL_BASEDIR'};
my $dicdir = my_find_dir("$basedir",
['lib/mecab/dic', # Bundled mecab
'lib/mysql/mecab/dic', # 32 bits RPMS
'lib64/mysql/mecab/dic'] , # 64 bits RPMS
["$dir"], 1);
if ($dicdir eq '' && index($dir, 'utf-8') != -1) {
# Fedora
$dicdir = my_find_dir("/usr",
['lib/mecab/dic', # System mecab 32 bits
'lib64/mecab/dic'], # System mecab 64 bits
["ipadic"], 1);
# Debian/Ubuntu
if ($dicdir eq '') {
$dicdir = my_find_dir("/var",
['lib/mecab/dic'], # System mecab
["ipadic-utf8"], 1);
}
} elsif ($dicdir eq '' && index($dir, 'euc-jp') != -1) {
# Fedora
$dicdir = my_find_dir("/usr",
['lib/mecab/dic/', # System mecab 32 bits
'lib64/mecab/dic/', ], # System mecab 64 bits
["ipadic-EUCJP"], 1);
# Debian/Ubuntu
if ($dicdir eq '') {
$dicdir = my_find_dir("/var",
['lib/mecab/dic'], # System mecab
["ipadic"], 1);
}
}
open(my $rc, '>', "$ENV{'MECABRC'}") || die "Could not open $ENV{'MECABRC'}: $!";
print $rc "dicdir = ${dicdir}\n";
close $rc;
EOF
-- source include/shutdown_mysqld.inc
-- exec echo "restart: --loose_mecab_rc_file=$mecabrc $MECAB_OPT --innodb_ft_min_token_size=2" >$MYSQLTEST_VARDIR/tmp/mysqld.1.expect
-- enable_reconnect
-- source include/wait_until_connected_again.inc
-- disable_reconnect
-- disable_query_log
-- error 0, ER_CANT_INITIALIZE_UDF
eval INSTALL PLUGIN mecab SONAME '$MECAB';
let $mecab_charset=`SELECT variable_value FROM performance_schema.global_status WHERE VARIABLE_NAME='mecab_charset'`;
if ($mecab_charset == '') {
--remove_file $MYSQL_DATADIR/mecabrc
-- skip Test fail to load mecab parser, IPADIC files for $ipadic_charset might be missing.
}
if ($mecab_charset != $mysql_charset) {
UNINSTALL PLUGIN mecab;
--remove_file $MYSQL_DATADIR/mecabrc
-- skip Test mecab charset mismatch (MeCab: $mecab_charset vs MySQL: $mysql_charset). Correct IPADIC files might be missing.
}
--enable_query_log
SHOW STATUS LIKE 'mecab_charset';
SET NAMES utf8;
eval CREATE TABLE page (
page_id int(8) unsigned NOT NULL AUTO_INCREMENT,
page_title varchar(255) CHARACTER SET $mecab_charset NOT NULL DEFAULT '',
PRIMARY KEY (page_id),
FULLTEXT KEY page_title (page_title) WITH PARSER mecab
) ENGINE=InnoDB;
INSERT INTO page VALUES
(1,'レモナ'),
(2,'SDレモナ'),
(3,'レモナ'),
(4,'データベース管理システム'),
(5,'キム'),
(6,'鉄道100電車'),
(7,'100'),
(8,'ラフィタ・カスティージョ'),
(9,'ツル科_(Sibley)'),
(10,'ツル'),
(11,'鉄道');
SET GLOBAL innodb_ft_aux_table="test/page";
SELECT * FROM INFORMATION_SCHEMA.INNODB_FT_INDEX_CACHE;
SELECT * FROM INFORMATION_SCHEMA.INNODB_FT_INDEX_TABLE;
SHOW CREATE TABLE page;
SELECT * FROM page WHERE MATCH(page_title) AGAINST("レモナ" IN BOOLEAN MODE);
SELECT * FROM page WHERE MATCH(page_title) AGAINST("キムショウカン" IN BOOLEAN MODE);
SELECT * FROM page WHERE MATCH(page_title) AGAINST("100" IN BOOLEAN MODE);
SELECT * FROM page WHERE MATCH(page_title) AGAINST("ツル*" IN BOOLEAN MODE);
SELECT * FROM page WHERE MATCH(page_title) AGAINST("ラフィタ・カスティージョ");
SELECT * FROM page WHERE MATCH(page_title) AGAINST("ラフィタ・カスティージョ" IN BOOLEAN MODE);
SELECT * FROM page WHERE MATCH(page_title) AGAINST("+ラフィタ・カスティージョ +データベース" IN BOOLEAN MODE);
SELECT * FROM page WHERE MATCH(page_title) AGAINST("+ラフィタ・カスティージョ +(データベース)" IN BOOLEAN MODE);
SELECT * FROM page WHERE MATCH(page_title) AGAINST("-ラフィタ・カスティージョ +(データベース)" IN BOOLEAN MODE);
SELECT * FROM page WHERE MATCH(page_title) AGAINST("ラフィタ・カスティージョ 鉄道" IN BOOLEAN MODE);
SELECT * FROM page WHERE MATCH(page_title) AGAINST("<ラフィタ・カスティージョ >鉄道" IN BOOLEAN MODE);
SELECT * FROM page WHERE MATCH(page_title) AGAINST('鉄道');
SELECT * FROM page WHERE MATCH(page_title) AGAINST('鉄道' WITH QUERY EXPANSION);
eval ALTER TABLE page add column c1 varchar(100) CHARACTER SET $mecab_charset DEFAULT 'レモナ';
ALTER TABLE page ADD FULLTEXT INDEX idx1 (page_title,c1) WITH PARSER mecab;
SELECT * FROM page WHERE MATCH(page_title,c1) AGAINST('レモナ');
SELECT * FROM page WHERE MATCH(page_title,c1) AGAINST('-レモナ' IN BOOLEAN MODE);
DROP TABLE page;
# Test charset mismatch.
CREATE TABLE page (
page_id int(8) unsigned NOT NULL AUTO_INCREMENT,
page_title varchar(255) CHARACTER SET big5 NOT NULL DEFAULT '',
PRIMARY KEY (page_id)
)ENGINE=InnoDB;
INSERT INTO page VALUES (1,'日本');
# We don't return charset mismatch error because of parallel fts build.
CREATE FULLTEXT INDEX ft_idx ON page(page_title) WITH PARSER mecab;
SELECT * FROM INFORMATION_SCHEMA.INNODB_FT_INDEX_CACHE;
SELECT * FROM INFORMATION_SCHEMA.INNODB_FT_INDEX_TABLE;
--error ER_ERROR_ON_WRITE
INSERT INTO page VALUES (2,'');
--error ER_ERROR_ON_WRITE
SELECT * FROM page WHERE MATCH(page_title) AGAINST('');
DROP TABLE page;
# Test utf8mb4
if ($mecab_charset == 'utf8')
{
SET NAMES utf8mb4;
CREATE TABLE page (
page_id int(8) unsigned NOT NULL AUTO_INCREMENT,
page_title varchar(255) CHARACTER SET utf8mb4 NOT NULL DEFAULT '',
PRIMARY KEY (page_id),
FULLTEXT KEY page_title (page_title) WITH PARSER mecab
) ENGINE=InnoDB;
# insert some emoji chars with 4 bytes.
INSERT INTO page VALUES
(1,'😃'),
(2,'😄'),
(3,'😊100');
SET GLOBAL innodb_ft_aux_table="test/page";
SELECT * FROM INFORMATION_SCHEMA.INNODB_FT_INDEX_CACHE;
SELECT * FROM INFORMATION_SCHEMA.INNODB_FT_INDEX_TABLE;
SHOW CREATE TABLE page;
SELECT * FROM page WHERE MATCH(page_title) AGAINST("レモナ" IN BOOLEAN MODE);
SELECT * FROM page WHERE MATCH(page_title) AGAINST('');
SELECT * FROM page WHERE MATCH(page_title) AGAINST('😄');
SELECT * FROM page WHERE MATCH(page_title) AGAINST('' WITH QUERY EXPANSION);
DROP TABLE page;
}
# Test eucjpms & cp932
if ($mysql_win_charset)
{
eval CREATE TABLE page (
page_id int(8) unsigned NOT NULL AUTO_INCREMENT,
page_title varchar(255) CHARACTER SET $mysql_win_charset NOT NULL DEFAULT '',
PRIMARY KEY (page_id),
FULLTEXT KEY page_title (page_title) WITH PARSER mecab
) ENGINE=InnoDB;
# insert some eucjpms & cp932 chars.
INSERT INTO page VALUES
(1,''),
(2,''),
(3,'100');
SET GLOBAL innodb_ft_aux_table="test/page";
SELECT * FROM INFORMATION_SCHEMA.INNODB_FT_INDEX_CACHE;
SELECT * FROM INFORMATION_SCHEMA.INNODB_FT_INDEX_TABLE;
SHOW CREATE TABLE page;
SELECT * FROM page WHERE MATCH(page_title) AGAINST("レモナ" IN BOOLEAN MODE);
SELECT * FROM page WHERE MATCH(page_title) AGAINST('');
SELECT * FROM page WHERE MATCH(page_title) AGAINST('');
SELECT * FROM page WHERE MATCH(page_title) AGAINST('' WITH QUERY EXPANSION);
DROP TABLE page;
}
SET GLOBAL innodb_ft_aux_table=default;
UNINSTALL PLUGIN mecab;
--remove_file $MYSQL_DATADIR/mecabrc