1007 lines
32 KiB
Plaintext
1007 lines
32 KiB
Plaintext
#
|
|
# Tests of regular expression functions.
|
|
#
|
|
CREATE TABLE t1 (
|
|
subject char(10),
|
|
pattern char(10)
|
|
);
|
|
#
|
|
# REGEXP_INSTR
|
|
#
|
|
# REGEXP_INSTR(), two arguments.
|
|
SELECT regexp_instr( 'abc', 'a' );
|
|
regexp_instr( 'abc', 'a' )
|
|
1
|
|
SELECT regexp_instr( 'abc', 'b' );
|
|
regexp_instr( 'abc', 'b' )
|
|
2
|
|
SELECT regexp_instr( 'abc', 'c' );
|
|
regexp_instr( 'abc', 'c' )
|
|
3
|
|
SELECT regexp_instr( 'abc', 'd' );
|
|
regexp_instr( 'abc', 'd' )
|
|
0
|
|
SELECT regexp_instr( NULL, 'a' );
|
|
regexp_instr( NULL, 'a' )
|
|
NULL
|
|
SELECT regexp_instr( 'a', NULL );
|
|
regexp_instr( 'a', NULL )
|
|
NULL
|
|
SELECT regexp_instr( NULL, NULL );
|
|
regexp_instr( NULL, NULL )
|
|
NULL
|
|
# This tests that the correct character set is declared. If we don't
|
|
# declare it correctly, the UTF-16 sequence will be interpreted as a
|
|
# two-byte string consisting of the ASCII for '1' and NUL, and the
|
|
# result will be 3100.
|
|
SET NAMES binary;
|
|
SELECT hex( concat(regexp_instr( 'a', 'a' )) );
|
|
hex( concat(regexp_instr( 'a', 'a' )) )
|
|
31
|
|
SET NAMES DEFAULT;
|
|
# REGEXP_INSTR(), illegal types.
|
|
SELECT regexp_instr( 1, 'a' );
|
|
regexp_instr( 1, 'a' )
|
|
0
|
|
SELECT regexp_instr( 1.1, 'a' );
|
|
regexp_instr( 1.1, 'a' )
|
|
0
|
|
SELECT regexp_instr( 'a', 1 );
|
|
regexp_instr( 'a', 1 )
|
|
0
|
|
SELECT regexp_instr( 'a', 1.1 );
|
|
regexp_instr( 'a', 1.1 )
|
|
0
|
|
SELECT regexp_instr( subject, pattern ) FROM t1;
|
|
regexp_instr( subject, pattern )
|
|
SELECT regexp_instr( 'a', '[[:invalid_bracket_expression:]]' );
|
|
ERROR HY000: Illegal argument to a regular expression.
|
|
# REGEXP_INSTR(), three arguments.
|
|
# subject, pattern, position.
|
|
SELECT regexp_instr( 'abcabcabc', 'a+', 1 );
|
|
regexp_instr( 'abcabcabc', 'a+', 1 )
|
|
1
|
|
SELECT regexp_instr( 'abcabcabc', 'a+', 2 );
|
|
regexp_instr( 'abcabcabc', 'a+', 2 )
|
|
4
|
|
SELECT regexp_instr( 'abcabcabc', 'b+', 1 );
|
|
regexp_instr( 'abcabcabc', 'b+', 1 )
|
|
2
|
|
SELECT regexp_instr( 'abcabcabc', 'b+', 2 );
|
|
regexp_instr( 'abcabcabc', 'b+', 2 )
|
|
2
|
|
SELECT regexp_instr( 'abcabcabc', 'b+', 3 );
|
|
regexp_instr( 'abcabcabc', 'b+', 3 )
|
|
5
|
|
SELECT regexp_instr( 'a', 'a+', 3 );
|
|
ERROR HY000: Index out of bounds in regular expression search.
|
|
# REGEXP_INSTR(), four arguments.
|
|
# subject, pattern, position, occurence.
|
|
SELECT regexp_instr( 'abcabcabc', 'a+', 1, 2 );
|
|
regexp_instr( 'abcabcabc', 'a+', 1, 2 )
|
|
4
|
|
SELECT regexp_instr( 'abcabcabc', 'a+', 1, 3 );
|
|
regexp_instr( 'abcabcabc', 'a+', 1, 3 )
|
|
7
|
|
SELECT regexp_instr( 'abcabcabc', 'a+', 1, 4 );
|
|
regexp_instr( 'abcabcabc', 'a+', 1, 4 )
|
|
0
|
|
SELECT regexp_instr( 'abcabcabc', 'a+', 4, 2 );
|
|
regexp_instr( 'abcabcabc', 'a+', 4, 2 )
|
|
7
|
|
# REGEXP_INSTR(), five arguments.
|
|
# subject, pattern, position, occurence, return_option
|
|
SELECT regexp_instr( 'a', 'a+', 1, 1, 2 );
|
|
ERROR HY000: Incorrect arguments to regexp_instr: return_option must be 1 or 0.
|
|
SELECT regexp_instr( 'a', 'a+', 1, 1, -1 );
|
|
ERROR HY000: Incorrect arguments to regexp_instr: return_option must be 1 or 0.
|
|
SELECT regexp_instr( 'a', 'a+', 1, 1, NULL );
|
|
regexp_instr( 'a', 'a+', 1, 1, NULL )
|
|
NULL
|
|
SELECT regexp_instr( 'abcabcabc', 'a+', 1, 1, 0 );
|
|
regexp_instr( 'abcabcabc', 'a+', 1, 1, 0 )
|
|
1
|
|
SELECT regexp_instr( 'abcabcabc', 'a+', 1, 1, 1 );
|
|
regexp_instr( 'abcabcabc', 'a+', 1, 1, 1 )
|
|
2
|
|
SELECT regexp_instr( 'aaabcabcabc', 'a+', 1, 1, 1 );
|
|
regexp_instr( 'aaabcabcabc', 'a+', 1, 1, 1 )
|
|
4
|
|
# REGEXP_INSTR(), six arguments.
|
|
# subject, pattern, position, occurence, return_option, match_parameter
|
|
SELECT regexp_instr( 'aaabcabcabc', 'A+', 1, 1, 1, 'c' );
|
|
regexp_instr( 'aaabcabcabc', 'A+', 1, 1, 1, 'c' )
|
|
0
|
|
SELECT regexp_instr( 'aaabcabcabc', 'A+', 1, 1, 1, 'i' );
|
|
regexp_instr( 'aaabcabcabc', 'A+', 1, 1, 1, 'i' )
|
|
4
|
|
SELECT regexp_instr( 'aaabcabcabc', 'A+', 1, 1, 1, 'ci' );
|
|
regexp_instr( 'aaabcabcabc', 'A+', 1, 1, 1, 'ci' )
|
|
4
|
|
SELECT regexp_instr( 'aaabcabcabc', 'A+', 1, 1, 1, 'cic' );
|
|
regexp_instr( 'aaabcabcabc', 'A+', 1, 1, 1, 'cic' )
|
|
0
|
|
SELECT regexp_instr( 'a', 'a+', 1, 1, 1, 'x' );
|
|
ERROR HY000: Incorrect arguments to regexp_instr
|
|
SELECT regexp_instr( 'a', 'a+', 1, 1, 1, NULL );
|
|
regexp_instr( 'a', 'a+', 1, 1, 1, NULL )
|
|
NULL
|
|
#
|
|
# REGEXP_LIKE
|
|
#
|
|
# REGEXP_LIKE(), two arguments.
|
|
SELECT regexp_like( 'abc', 'a' );
|
|
regexp_like( 'abc', 'a' )
|
|
1
|
|
SELECT regexp_like( 'abc', 'b' );
|
|
regexp_like( 'abc', 'b' )
|
|
1
|
|
SELECT regexp_like( 'abc', 'c' );
|
|
regexp_like( 'abc', 'c' )
|
|
1
|
|
SELECT regexp_like( 'abc', 'd' );
|
|
regexp_like( 'abc', 'd' )
|
|
0
|
|
SELECT regexp_like( 'a', 'a.*' );
|
|
regexp_like( 'a', 'a.*' )
|
|
1
|
|
SELECT regexp_like( 'ab', 'a.*' );
|
|
regexp_like( 'ab', 'a.*' )
|
|
1
|
|
SELECT regexp_like( NULL, 'a' );
|
|
regexp_like( NULL, 'a' )
|
|
NULL
|
|
SELECT regexp_like( 'a', NULL );
|
|
regexp_like( 'a', NULL )
|
|
NULL
|
|
SELECT regexp_like( NULL, NULL );
|
|
regexp_like( NULL, NULL )
|
|
NULL
|
|
# This tests that the correct character set is declared. If we don't
|
|
# declare it correctly, the UTF-16 sequence will be interpreted as a
|
|
# two-byte string consisting of the ASCII for '1' and NUL, and the
|
|
# result will be 3100.
|
|
SET NAMES binary;
|
|
SELECT hex( concat(regexp_like( 'a', 'a' )) );
|
|
hex( concat(regexp_like( 'a', 'a' )) )
|
|
31
|
|
SET NAMES DEFAULT;
|
|
# REGEXP_LIKE(), three arguments.
|
|
SELECT regexp_like( 'abc', 'A', 'i' );
|
|
regexp_like( 'abc', 'A', 'i' )
|
|
1
|
|
SELECT regexp_like( 'abc', 'A', 'c' );
|
|
regexp_like( 'abc', 'A', 'c' )
|
|
0
|
|
SELECT regexp_like( 'a', 'a+', 'x' );
|
|
ERROR HY000: Incorrect arguments to regexp_like
|
|
SELECT regexp_like( 'a', 'a+', 'cmnux' );
|
|
ERROR HY000: Incorrect arguments to regexp_like
|
|
SELECT regexp_like( 'a', 'a+', NULL );
|
|
regexp_like( 'a', 'a+', NULL )
|
|
NULL
|
|
# REGEXP_LIKE(), illegal types.
|
|
SELECT regexp_like( 1, 'a' );
|
|
regexp_like( 1, 'a' )
|
|
0
|
|
SELECT regexp_like( 1.1, 'a' );
|
|
regexp_like( 1.1, 'a' )
|
|
0
|
|
SELECT regexp_like( 'a', 1 );
|
|
regexp_like( 'a', 1 )
|
|
0
|
|
SELECT regexp_like( 'a', 1.1 );
|
|
regexp_like( 'a', 1.1 )
|
|
0
|
|
SELECT regexp_like('a', '[[:invalid_bracket_expression:]]');
|
|
ERROR HY000: Illegal argument to a regular expression.
|
|
#
|
|
# REGEXP_REPLACE
|
|
#
|
|
# REGEXP_REPLACE(), three arguments.
|
|
SELECT regexp_replace( 'aaa', 'a', 'X' );
|
|
regexp_replace( 'aaa', 'a', 'X' )
|
|
XXX
|
|
SELECT regexp_replace( 'abc', 'b', 'X' );
|
|
regexp_replace( 'abc', 'b', 'X' )
|
|
aXc
|
|
SELECT regexp_replace( NULL, 'a', 'X' );
|
|
regexp_replace( NULL, 'a', 'X' )
|
|
NULL
|
|
SELECT regexp_replace( 'aaa', NULL, 'X' );
|
|
regexp_replace( 'aaa', NULL, 'X' )
|
|
NULL
|
|
SELECT regexp_replace( 'aaa', 'a', NULL );
|
|
regexp_replace( 'aaa', 'a', NULL )
|
|
NULL
|
|
# This tests that the correct character set is declared. If we don't
|
|
# declare it correctly, the UTF-16 sequence will be interpreted as a
|
|
# zero-terminated string consisting of 'X', and the
|
|
# result will thus be 'X'.
|
|
SELECT concat( regexp_replace( 'aaa', 'a', 'X' ), 'x' );
|
|
concat( regexp_replace( 'aaa', 'a', 'X' ), 'x' )
|
|
XXXx
|
|
# REGEXP_REPLACE(), four arguments.
|
|
SELECT regexp_replace( 'aaa', 'a', 'X', 0 );
|
|
ERROR 42000: Incorrect parameters in the call to native function 'regexp_replace'
|
|
SELECT regexp_replace( 'aaa', 'a', 'X', 1 );
|
|
regexp_replace( 'aaa', 'a', 'X', 1 )
|
|
XXX
|
|
SELECT regexp_replace( 'a', 'a+', 'b', 3 );
|
|
ERROR HY000: Index out of bounds in regular expression search.
|
|
# REGEXP_REPLACE(), five arguments.
|
|
SELECT regexp_replace( 'aaabbccbbddaa', 'b+', 'X', 0, 1 );
|
|
ERROR 42000: Incorrect parameters in the call to native function 'regexp_replace'
|
|
SELECT regexp_replace( 'aaabbccbbddaa', 'b+', 'X', 1, 1 );
|
|
regexp_replace( 'aaabbccbbddaa', 'b+', 'X', 1, 1 )
|
|
aaaXccbbddaa
|
|
SELECT regexp_replace( 'aaabbccbbddaa', 'b+', 'X', 1, 2 );
|
|
regexp_replace( 'aaabbccbbddaa', 'b+', 'X', 1, 2 )
|
|
aaabbccXddaa
|
|
SELECT regexp_replace( 'aaabbccbbddaa', '(b+)', '<$1>', 1, 2 );
|
|
regexp_replace( 'aaabbccbbddaa', '(b+)', '<$1>', 1, 2 )
|
|
aaabbcc<bb>ddaa
|
|
SELECT regexp_replace( 'aaabbccbbddaa', 'x+', 'x', 1, 0 );
|
|
regexp_replace( 'aaabbccbbddaa', 'x+', 'x', 1, 0 )
|
|
aaabbccbbddaa
|
|
SELECT regexp_replace( 'aaabbccbbddaa', 'b+', 'x', 1, 0 );
|
|
regexp_replace( 'aaabbccbbddaa', 'b+', 'x', 1, 0 )
|
|
aaaxccxddaa
|
|
SELECT regexp_replace( 'aaab', 'b', 'x', 1, 2 );
|
|
regexp_replace( 'aaab', 'b', 'x', 1, 2 )
|
|
aaab
|
|
SELECT regexp_replace( 'aaabccc', 'b', 'x', 1, 2 );
|
|
regexp_replace( 'aaabccc', 'b', 'x', 1, 2 )
|
|
aaabccc
|
|
SELECT regexp_replace( 'abc', 'b', 'X' );
|
|
regexp_replace( 'abc', 'b', 'X' )
|
|
aXc
|
|
SELECT regexp_replace( 'abcbdb', 'b', 'X' );
|
|
regexp_replace( 'abcbdb', 'b', 'X' )
|
|
aXcXdX
|
|
SELECT regexp_replace( 'abcbdb', 'b', 'X', 3 );
|
|
regexp_replace( 'abcbdb', 'b', 'X', 3 )
|
|
abcXdX
|
|
SELECT regexp_replace( 'aaabcbdb', 'b', 'X', 1 );
|
|
regexp_replace( 'aaabcbdb', 'b', 'X', 1 )
|
|
aaaXcXdX
|
|
SELECT regexp_replace( 'aaabcbdb', 'b', 'X', 2 );
|
|
regexp_replace( 'aaabcbdb', 'b', 'X', 2 )
|
|
aaaXcXdX
|
|
SELECT regexp_replace( 'aaabcbdb', 'b', 'X', 3 );
|
|
regexp_replace( 'aaabcbdb', 'b', 'X', 3 )
|
|
aaaXcXdX
|
|
SELECT regexp_replace( 'a', '[[:invalid_bracket_expression:]]', '$1' );
|
|
ERROR HY000: Illegal argument to a regular expression.
|
|
#
|
|
# Test of the dynamic buffer in REGEXP_REPLACE.
|
|
#
|
|
SELECT regexp_replace( 'aaa', 'a', 'X', 2 );
|
|
regexp_replace( 'aaa', 'a', 'X', 2 )
|
|
aXX
|
|
SELECT regexp_replace( 'aaa', 'a', 'XX', 2 );
|
|
regexp_replace( 'aaa', 'a', 'XX', 2 )
|
|
aXXXX
|
|
#
|
|
# REGEXP_SUBSTR
|
|
#
|
|
SELECT regexp_substr( 'a' );
|
|
ERROR 42000: Incorrect parameter count in the call to native function 'regexp_substr'
|
|
SELECT regexp_substr( 'a', 'b', 'c', 'd', 'e', 'f' );
|
|
ERROR 42000: Incorrect parameter count in the call to native function 'regexp_substr'
|
|
# REGEXP_SUBSTR(), two arguments.
|
|
SELECT regexp_substr( 'ab ac ad', '.d' );
|
|
regexp_substr( 'ab ac ad', '.d' )
|
|
ad
|
|
SELECT regexp_substr( 'ab ac ad', '.D' );
|
|
regexp_substr( 'ab ac ad', '.D' )
|
|
ad
|
|
# This tests that the correct character set is declared. If we don't
|
|
# declare it correctly, the UTF-16 sequence will be interpreted as a
|
|
# zero-terminated string consisting of 'a', and the
|
|
# result will thus be 'a'.
|
|
SELECT concat( regexp_substr( 'aaa', 'a+' ), 'x' );
|
|
concat( regexp_substr( 'aaa', 'a+' ), 'x' )
|
|
aaax
|
|
# REGEXP_SUBSTR(), three arguments.
|
|
SELECT regexp_substr( 'ab ac ad', 'a.', 0 );
|
|
ERROR 42000: Incorrect parameters in the call to native function 'regexp_substr'
|
|
SELECT regexp_substr( 'ab ac ad', 'A.', 3 );
|
|
regexp_substr( 'ab ac ad', 'A.', 3 )
|
|
ac
|
|
SELECT regexp_substr( 'ab ac ad', 'A.', 3, 1 );
|
|
regexp_substr( 'ab ac ad', 'A.', 3, 1 )
|
|
ac
|
|
SELECT regexp_substr( 'ab ac ad', 'A.', 3, 2 );
|
|
regexp_substr( 'ab ac ad', 'A.', 3, 2 )
|
|
ad
|
|
SELECT regexp_substr( 'ab ac ad', 'A.', 3, 3 );
|
|
regexp_substr( 'ab ac ad', 'A.', 3, 3 )
|
|
NULL
|
|
SELECT regexp_substr( 'ab ac ad', 'A.', 3, 3 ) IS NULL;
|
|
regexp_substr( 'ab ac ad', 'A.', 3, 3 ) IS NULL
|
|
0
|
|
# REGEXP_SUBSTR(), four arguments.
|
|
SELECT regexp_substr( 'ab ac ad', 'A.', 1, 1, 'c' );
|
|
regexp_substr( 'ab ac ad', 'A.', 1, 1, 'c' )
|
|
NULL
|
|
SELECT regexp_substr( 'ab\nac\nad', 'A.', 1, 1, 'i' );
|
|
regexp_substr( 'ab\nac\nad', 'A.', 1, 1, 'i' )
|
|
ab
|
|
SELECT regexp_substr( 'ab\nac\nad', 'A.', 1, 1, 'im' );
|
|
regexp_substr( 'ab\nac\nad', 'A.', 1, 1, 'im' )
|
|
ab
|
|
SELECT regexp_substr('a', '[[:invalid_bracket_expression:]]');
|
|
ERROR HY000: Illegal argument to a regular expression.
|
|
SET sql_mode = '';
|
|
CREATE TABLE t2 ( g GEOMETRY NOT NULL );
|
|
INSERT INTO t2 VALUES ( POINT(1,2) );
|
|
SELECT concat( regexp_like(g, g), 'x' ) FROM t2;
|
|
concat( regexp_like(g, g), 'x' )
|
|
0x
|
|
SET sql_mode = DEFAULT;
|
|
DROP TABLE t2;
|
|
#
|
|
DROP TABLE t1;
|
|
#
|
|
# Error handling.
|
|
#
|
|
# Error handling: Stack limit.
|
|
# The following queries are from bug#24449090
|
|
SELECT regexp_instr( '', '((((((((){80}){}){11}){11}){11}){80}){11}){4}' );
|
|
ERROR HY000: Incorrect description of a {min,max} interval.
|
|
# Query and result logs turned off from here ...
|
|
# ... to here. The reason is that we're testing the stack limit feature,
|
|
# which is not present (or working) in ICU 5.0 or earlier. We don't need
|
|
# the query or result/error log to test the feature, though. The above
|
|
# tests still fail unless the proper error is thrown.
|
|
#
|
|
# Character set conversion.
|
|
#
|
|
SET NAMES latin1;
|
|
SELECT regexp_instr( _latin1 x'61F662', _latin1 x'F6' );
|
|
regexp_instr( _latin1 x'61F662', _latin1 x'F6' )
|
|
2
|
|
SELECT regexp_instr( _latin1 x'61F662', _utf8mb4'ö' );
|
|
regexp_instr( _latin1 x'61F662', _utf8mb4'ö' )
|
|
2
|
|
SELECT regexp_instr( concat('a', _utf8mb4 x'F09F8DA3'), _utf8mb4 x'F09F8DA3' );
|
|
regexp_instr( concat('a', _utf8mb4 x'F09F8DA3'), _utf8mb4 x'F09F8DA3' )
|
|
2
|
|
SELECT regexp_instr( _utf8mb4'aöb', _utf8mb4'ö' );
|
|
regexp_instr( _utf8mb4'aöb', _utf8mb4'ö' )
|
|
2
|
|
SET NAMES utf8;
|
|
Warnings:
|
|
Warning 3719 'utf8' is currently an alias for the character set UTF8MB3, but will be an alias for UTF8MB4 in a future release. Please consider using UTF8MB4 in order to be unambiguous.
|
|
SELECT regexp_instr( 'aöb', 'ö' );
|
|
regexp_instr( 'aöb', 'ö' )
|
|
2
|
|
SET NAMES DEFAULT;
|
|
#
|
|
# Bug#30241: Regular expression problems
|
|
#
|
|
SET NAMES utf8;
|
|
Warnings:
|
|
Warning 3719 'utf8' is currently an alias for the character set UTF8MB3, but will be an alias for UTF8MB4 in a future release. Please consider using UTF8MB4 in order to be unambiguous.
|
|
SELECT regexp_instr( 'אב רק', /*k*/'^[^ב]' );
|
|
regexp_instr( 'אב רק', /*k*/'^[^ב]' )
|
|
1
|
|
PREPARE stmt1 FROM "select 'a' rlike ?";
|
|
DEALLOCATE PREPARE stmt1;
|
|
CREATE TABLE t1( a INT, subject CHAR(10) );
|
|
CREATE TABLE t2( pattern CHAR(10) );
|
|
insert into t1 values (0, 'apa');
|
|
insert into t2 values ('apa');
|
|
CREATE DEFINER=root@localhost PROCEDURE p1()
|
|
BEGIN
|
|
UPDATE t1, t2
|
|
SET a = 1
|
|
WHERE regexp_like(t1.subject, t2.pattern);
|
|
END||
|
|
CALL p1();
|
|
DROP PROCEDURE p1;
|
|
DROP TABLE t1, t2;
|
|
CREATE TABLE t1 ( a INT );
|
|
EXPLAIN SELECT 1 FROM t1 WHERE 1 REGEXP (1 IN (SELECT 1 FROM t1));
|
|
id select_type table partitions type possible_keys key key_len ref rows filtered Extra
|
|
1 PRIMARY NULL NULL NULL NULL NULL NULL NULL NULL NULL Impossible WHERE
|
|
2 SUBQUERY t1 NULL ALL NULL NULL NULL NULL 1 100.00 NULL
|
|
Warnings:
|
|
Note 1003 /* select#1 */ select 1 AS `1` from `test`.`t1` where false
|
|
DROP TABLE t1;
|
|
PREPARE stmt1 FROM "SELECT regexp_like( 'a', ? )";
|
|
PREPARE stmt2 FROM "SELECT regexp_like( ?, 'a' )";
|
|
PREPARE stmt3 FROM "SELECT regexp_like( ?, ? )";
|
|
SET @subject = 'a';
|
|
SET @pattern = 'a+';
|
|
EXECUTE stmt1 USING @pattern;
|
|
regexp_like( 'a', ? )
|
|
1
|
|
EXECUTE stmt2 USING @subject;
|
|
regexp_like( ?, 'a' )
|
|
1
|
|
EXECUTE stmt3 USING @subject, @pattern;
|
|
regexp_like( ?, ? )
|
|
1
|
|
#
|
|
# Yes, you can circumvent the type checking using prepared statements.
|
|
#
|
|
SET @subject = 1;
|
|
SET @pattern = 1;
|
|
EXECUTE stmt1 USING @pattern;
|
|
regexp_like( 'a', ? )
|
|
0
|
|
EXECUTE stmt2 USING @subject;
|
|
regexp_like( ?, 'a' )
|
|
0
|
|
EXECUTE stmt3 USING @subject, @pattern;
|
|
regexp_like( ?, ? )
|
|
1
|
|
DEALLOCATE PREPARE stmt1;
|
|
DEALLOCATE PREPARE stmt2;
|
|
DEALLOCATE PREPARE stmt3;
|
|
# Let's make sure we handle arguments that raise errors when evaluated.
|
|
DO 1 rlike multilinestring(point(1, 1));
|
|
ERROR HY000: Incorrect arguments to multilinestring
|
|
#
|
|
# Test of repeated use of one matcher.
|
|
#
|
|
CREATE TABLE t1 ( a CHAR(10) );
|
|
INSERT INTO t1 VALUES ( 'abc' ), ( 'bcd' ), ( 'cde' );
|
|
SELECT regexp_like( a, 'a' ) FROM t1;
|
|
regexp_like( a, 'a' )
|
|
1
|
|
0
|
|
0
|
|
DROP TABLE t1;
|
|
CREATE TABLE t1 ( a CHAR ( 10 ), b CHAR ( 10 ) );
|
|
INSERT INTO t1 VALUES( NULL, 'abc' );
|
|
INSERT INTO t1 VALUES( 'def', NULL );
|
|
SELECT a, b, regexp_like( a, b ) FROM t1;
|
|
a b regexp_like( a, b )
|
|
NULL abc NULL
|
|
def NULL NULL
|
|
DROP TABLE t1;
|
|
#
|
|
# Generated columns.
|
|
#
|
|
CREATE TABLE t1 (
|
|
c CHAR(10) CHARSET latin1 COLLATE latin1_bin,
|
|
c_ci CHAR(10) CHARSET latin1 COLLATE latin1_general_ci,
|
|
c_cs CHAR(10) CHARSET latin1 COLLATE latin1_general_cs
|
|
);
|
|
INSERT INTO t1
|
|
VALUES ( 'a', 'a', 'a' ), ( 'A', 'A', 'A' ), ( 'b', 'b', 'b' );
|
|
SELECT c, c_ci REGEXP 'A', c_cs REGEXP 'A' FROM t1;
|
|
c c_ci REGEXP 'A' c_cs REGEXP 'A'
|
|
A 1 1
|
|
a 1 0
|
|
b 0 0
|
|
DROP TABLE t1;
|
|
SELECT regexp_like( _utf8mb4 'ss' COLLATE utf8mb4_german2_ci,
|
|
_utf8mb4 'ß' COLLATE utf8mb4_german2_ci );
|
|
regexp_like( _utf8mb4 'ss' COLLATE utf8mb4_german2_ci,
|
|
_utf8mb4 'ß' COLLATE utf8mb4_german2_ci )
|
|
1
|
|
SELECT regexp_like( _utf8mb4 'ß' COLLATE utf8mb4_german2_ci,
|
|
_utf8mb4 'ss' );
|
|
regexp_like( _utf8mb4 'ß' COLLATE utf8mb4_german2_ci,
|
|
_utf8mb4 'ss' )
|
|
1
|
|
SELECT regexp_like( _utf8mb4 'ß' COLLATE utf8mb4_de_pb_0900_as_cs,
|
|
_utf8mb4 'ss' );
|
|
regexp_like( _utf8mb4 'ß' COLLATE utf8mb4_de_pb_0900_as_cs,
|
|
_utf8mb4 'ss' )
|
|
0
|
|
#
|
|
# Regression testing.
|
|
#
|
|
SET NAMES latin1;
|
|
SELECT regexp_like( 'a', 'A' COLLATE latin1_general_ci );
|
|
regexp_like( 'a', 'A' COLLATE latin1_general_ci )
|
|
1
|
|
SELECT 'a' REGEXP 'A' COLLATE latin1_general_ci;
|
|
'a' REGEXP 'A' COLLATE latin1_general_ci
|
|
1
|
|
SELECT regexp_like( 'a', 'A' COLLATE latin1_general_cs );
|
|
regexp_like( 'a', 'A' COLLATE latin1_general_cs )
|
|
0
|
|
SELECT 'a' REGEXP 'A' COLLATE latin1_general_cs;
|
|
'a' REGEXP 'A' COLLATE latin1_general_cs
|
|
0
|
|
# The default behaviour, multiline match is off:
|
|
SELECT regexp_like( 'a\nb\nc', '^b$' );
|
|
regexp_like( 'a\nb\nc', '^b$' )
|
|
0
|
|
# Enabling the multiline option using the PCRE option syntax:
|
|
# (Previously not accepted)
|
|
SELECT regexp_like( 'a\nb\nc', '(?m)^b$' );
|
|
regexp_like( 'a\nb\nc', '(?m)^b$' )
|
|
1
|
|
# Dot matches newline:
|
|
SELECT regexp_like( 'a\nb\nc', '.*' );
|
|
regexp_like( 'a\nb\nc', '.*' )
|
|
1
|
|
SELECT regexp_like( _utf16 'a' , 'a' );
|
|
regexp_like( _utf16 'a' , 'a' )
|
|
1
|
|
SELECT regexp_like( _utf16le 'a' , 'a' );
|
|
regexp_like( _utf16le 'a' , 'a' )
|
|
0
|
|
# Make sure we exit on error.
|
|
SELECT regexp_like( 'aaa', 'a+', 1 );
|
|
ERROR HY000: Incorrect arguments to regexp_like
|
|
SELECT regexp_substr( 'aaa', 'a+', 1, 1, 1 );
|
|
ERROR HY000: Incorrect arguments to regexp_substr
|
|
SELECT regexp_substr( 'aaa', 'a+', 1, 1, 1 );
|
|
ERROR HY000: Incorrect arguments to regexp_substr
|
|
SELECT regexp_substr( 'aaa', '+' );
|
|
ERROR HY000: Syntax error in regular expression on line 1, character 1.
|
|
#
|
|
# Test of a table where the columns are already in the lib's charset and
|
|
# contain NULL.
|
|
#
|
|
CREATE TABLE t1 (
|
|
a CHAR(3) CHARACTER SET utf16le,
|
|
b CHAR(3) CHARACTER SET utf16le
|
|
);
|
|
INSERT INTO t1 VALUES ( NULL, 'abc' );
|
|
INSERT INTO t1 VALUES ( 'def', NULL );
|
|
INSERT INTO t1 VALUES ( NULL, NULL );
|
|
SELECT a regexp b FROM t1;
|
|
a regexp b
|
|
NULL
|
|
NULL
|
|
NULL
|
|
DROP TABLE t1;
|
|
#
|
|
# Typecasts.
|
|
#
|
|
CREATE TABLE t1
|
|
(
|
|
a REAL,
|
|
b INT,
|
|
c CHAR(100),
|
|
d DECIMAL
|
|
);
|
|
INSERT INTO t1 VALUES ( regexp_instr('a', 'a'),
|
|
regexp_instr('a', 'a'),
|
|
regexp_instr('a', 'a'),
|
|
regexp_instr('a', 'a') );
|
|
SELECT * FROM t1;
|
|
a b c d
|
|
1 1 1 1
|
|
DELETE FROM t1;
|
|
INSERT INTO t1 VALUES ( regexp_like('a', 'a'),
|
|
regexp_like('a', 'a'),
|
|
regexp_like('a', 'a'),
|
|
regexp_like('a', 'a') );
|
|
SELECT * FROM t1;
|
|
a b c d
|
|
1 1 1 1
|
|
DELETE FROM t1;
|
|
INSERT INTO t1 ( a ) VALUES ( regexp_replace('a', 'a', 'a') );
|
|
ERROR 01000: Data truncated for column 'a' at row 1
|
|
INSERT INTO t1 ( b ) VALUES ( regexp_replace('a', 'a', 'a') );
|
|
ERROR HY000: Incorrect integer value: 'a' for column 'b' at row 1
|
|
INSERT INTO t1 ( c ) VALUES ( regexp_replace('a', 'a', 'a') );
|
|
INSERT INTO t1 ( d ) VALUES ( regexp_replace('a', 'a', 'a') );
|
|
ERROR HY000: Incorrect decimal value: 'a' for column 'd' at row 1
|
|
SELECT * FROM t1;
|
|
a b c d
|
|
NULL NULL a NULL
|
|
DELETE FROM t1;
|
|
INSERT INTO t1 ( a ) VALUES ( regexp_substr('a', 'a') );
|
|
ERROR 01000: Data truncated for column 'a' at row 1
|
|
INSERT INTO t1 ( b ) VALUES ( regexp_substr('a', 'a') );
|
|
ERROR HY000: Incorrect integer value: 'a' for column 'b' at row 1
|
|
INSERT INTO t1 ( c ) VALUES ( regexp_substr('a', 'a') );
|
|
INSERT INTO t1 ( d ) VALUES ( regexp_substr('a', 'a') );
|
|
ERROR HY000: Incorrect decimal value: 'a' for column 'd' at row 1
|
|
SELECT * FROM t1;
|
|
a b c d
|
|
NULL NULL a NULL
|
|
DROP TABLE t1;
|
|
# At the time of writing, val_int() is not called when inserting into an
|
|
# INT column.
|
|
SELECT cast( regexp_replace('a', 'a', 'a') AS SIGNED INTEGER );
|
|
cast( regexp_replace('a', 'a', 'a') AS SIGNED INTEGER )
|
|
0
|
|
Warnings:
|
|
Warning 1292 Truncated incorrect INTEGER value: 'a'
|
|
SELECT cast( regexp_substr ('a', 'a') AS SIGNED INTEGER );
|
|
cast( regexp_substr ('a', 'a') AS SIGNED INTEGER )
|
|
0
|
|
Warnings:
|
|
Warning 1292 Truncated incorrect INTEGER value: 'a'
|
|
# Casting to DATETIME/TIME
|
|
# Due to the class hierarchy of function objects, these have to be
|
|
# copy-pasted.
|
|
SELECT cast( regexp_instr ('a', 'a' ) AS DATETIME );
|
|
cast( regexp_instr ('a', 'a' ) AS DATETIME )
|
|
NULL
|
|
Warnings:
|
|
Warning 1292 Incorrect datetime value: '1'
|
|
SELECT cast( regexp_like ('a', 'a' ) AS DATETIME );
|
|
cast( regexp_like ('a', 'a' ) AS DATETIME )
|
|
NULL
|
|
Warnings:
|
|
Warning 1292 Incorrect datetime value: '1'
|
|
SELECT cast( regexp_replace('a', 'a', 'a') AS DATETIME );
|
|
cast( regexp_replace('a', 'a', 'a') AS DATETIME )
|
|
NULL
|
|
Warnings:
|
|
Warning 1292 Incorrect datetime value: 'a'
|
|
SELECT cast( regexp_substr ('a', 'a' ) AS DATETIME );
|
|
cast( regexp_substr ('a', 'a' ) AS DATETIME )
|
|
NULL
|
|
Warnings:
|
|
Warning 1292 Incorrect datetime value: 'a'
|
|
SELECT cast( regexp_instr ('a', 'a' ) AS TIME );
|
|
cast( regexp_instr ('a', 'a' ) AS TIME )
|
|
00:00:01
|
|
SELECT cast( regexp_like ('a', 'a' ) AS TIME );
|
|
cast( regexp_like ('a', 'a' ) AS TIME )
|
|
00:00:01
|
|
SELECT cast( regexp_replace('a', 'a', 'a') AS TIME );
|
|
cast( regexp_replace('a', 'a', 'a') AS TIME )
|
|
NULL
|
|
Warnings:
|
|
Warning 1292 Truncated incorrect time value: 'a'
|
|
SELECT cast( regexp_substr ('a', 'a' ) AS TIME );
|
|
cast( regexp_substr ('a', 'a' ) AS TIME )
|
|
NULL
|
|
Warnings:
|
|
Warning 1292 Truncated incorrect time value: 'a'
|
|
#
|
|
# Error messages.
|
|
# Test of the error messages themselves.
|
|
#
|
|
SET GLOBAL net_buffer_length = 1024;
|
|
SET GLOBAL max_allowed_packet = @@global.net_buffer_length;
|
|
SELECT @@global.max_allowed_packet;
|
|
@@global.max_allowed_packet
|
|
1024
|
|
# We need to start a new session in order for the changes to the session
|
|
# version of max_allowed_packet to take effect.
|
|
# This is now the replacement buffer size in UTF-16 characters.
|
|
SET @buf_sz_utf16 = @@global.max_allowed_packet / length( _utf16'x' );
|
|
SELECT @buf_sz_utf16;
|
|
@buf_sz_utf16
|
|
512.000000000
|
|
SELECT length(regexp_replace( repeat('a', @buf_sz_utf16), 'a', 'b' ));
|
|
length(regexp_replace( repeat('a', @buf_sz_utf16), 'a', 'b' ))
|
|
512
|
|
SELECT length(regexp_replace( repeat('a', @buf_sz_utf16 + 1), 'a', 'b' ));
|
|
ERROR HY000: The result string is larger than the result buffer.
|
|
SELECT length(regexp_replace( repeat('a', @buf_sz_utf16), 'a', 'bb' ));
|
|
ERROR HY000: The result string is larger than the result buffer.
|
|
SET GLOBAL net_buffer_length = DEFAULT;
|
|
Warnings:
|
|
Warning 1708 The value of 'max_allowed_packet' should be no less than the value of 'net_buffer_length'
|
|
SET GLOBAL max_allowed_packet = DEFAULT;
|
|
SELECT regexp_like( 'a', '[[:<:]]a' );
|
|
ERROR HY000: Illegal argument to a regular expression.
|
|
SELECT regexp_like( 'a', ' **' );
|
|
ERROR HY000: Syntax error in regular expression on line 1, character 5.
|
|
SELECT regexp_like( 'a', ' \n **' );
|
|
ERROR HY000: Syntax error in regular expression on line 2, character 4.
|
|
SELECT regexp_like( 'a', ' +++' );
|
|
ERROR HY000: Syntax error in regular expression on line 1, character 5.
|
|
SELECT regexp_like( 'a', '\\' );
|
|
ERROR HY000: Unrecognized escape sequence in regular expression.
|
|
SELECT regexp_like('a','(?{})');
|
|
ERROR HY000: The regular expression contains a feature that is not implemented in this library version.
|
|
SELECT regexp_like('a','(');
|
|
ERROR HY000: Mismatched parenthesis in regular expression.
|
|
SELECT regexp_like('a','a{}');
|
|
ERROR HY000: Incorrect description of a {min,max} interval.
|
|
SELECT regexp_like('a','a{2,1}');
|
|
ERROR HY000: The maximum is less than the minumum in a {min,max} interval.
|
|
SELECT regexp_like('a','\\1');
|
|
ERROR HY000: Invalid back-reference in regular expression.
|
|
SELECT regexp_substr( 'ab','(?<=a+)b' );
|
|
ERROR HY000: The look-behind assertion exceeds the limit in regular expression.
|
|
SELECT regexp_like( 'a', 'a[' );
|
|
ERROR HY000: The regular expression contains an unclosed bracket expression.
|
|
SELECT regexp_substr( 'ab','[b-a]' );
|
|
ERROR HY000: The regular expression contains an [x-y] character range where x comes after y.
|
|
#
|
|
# Test that the replacement buffer can grow beyond the maximum VARCHAR
|
|
# column length.
|
|
#
|
|
CREATE TABLE t1 ( a TEXT );
|
|
INSERT INTO t1 VALUES ( repeat( 'a', 16384 ) );
|
|
SELECT char_length ( regexp_replace( a, 'a', 'b' ) ) FROM t1;
|
|
char_length ( regexp_replace( a, 'a', 'b' ) )
|
|
16384
|
|
SET GLOBAL regexp_time_limit = 10000;
|
|
SELECT regexp_like ( regexp_replace( a, 'a', 'b' ), 'b{16384}' ) FROM t1;
|
|
regexp_like ( regexp_replace( a, 'a', 'b' ), 'b{16384}' )
|
|
1
|
|
SET GLOBAL regexp_time_limit = DEFAULT;
|
|
DROP TABLE t1;
|
|
#
|
|
# Bug#27134570: DOS: REGEXP TAKES EXPONENTIALLY LONGER, CAN'T BE KILLED,
|
|
# HOGS CPU
|
|
#
|
|
DO '1' regexp repeat('$', 50000000);
|
|
ERROR HY000: The regular expression pattern exceeds limits on size or complexity.
|
|
#
|
|
# Bug#27140286: REGEXP, ASSERTION FAILED: !THD->IS_ERROR() IN
|
|
# SETUP_FIELDS()
|
|
#
|
|
DO ( (@b) regexp (cot (unhex ( 1 )) ) );
|
|
ERROR 22003: DOUBLE value is out of range in 'cot(unhex(1))'
|
|
DO ( (@c) rlike (cot ( (!( @f )) )) );
|
|
Warnings:
|
|
Warning 1287 '!' is deprecated and will be removed in a future release. Please use NOT instead
|
|
DO ( ('') rlike (cot ( ' %' )) );
|
|
ERROR 22003: DOUBLE value is out of range in 'cot(' %')'
|
|
DO ( (-28277) regexp (period_add ( -10966, 1381205734 )) );
|
|
ERROR HY000: Incorrect arguments to period_add
|
|
DO ( (( @f )) rlike (json_depth ( 'key4' )) );
|
|
ERROR 22032: Invalid JSON text in argument 1 to function json_depth: "Invalid value." at position 0.
|
|
DO ( ('- ') regexp (cot ( right (':#.', 33) )) );
|
|
ERROR 22003: DOUBLE value is out of range in 'cot(right(':#.',33))'
|
|
DO ( (1) regexp (exp ( 64826 )) );
|
|
ERROR 22003: DOUBLE value is out of range in 'exp(64826)'
|
|
DO ( (@g) regexp (cot ( @f )) );
|
|
DO ( (@b) regexp (exp ( 0x1fc5574c )) );
|
|
ERROR 22003: DOUBLE value is out of range in 'exp(0x1fc5574c)'
|
|
DO ( (25091) rlike (exp ( 14373 )) );
|
|
ERROR 22003: DOUBLE value is out of range in 'exp(14373)'
|
|
#
|
|
# Bug#27183583: REGEXP OPERATIONS CANT BE KILLED
|
|
#
|
|
SET GLOBAL regexp_time_limit = 1000000;
|
|
connect conn1, localhost, root;
|
|
SELECT regexp_instr('AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC', '(A+)+B');
|
|
connection default;
|
|
KILL QUERY <conn1_id>;
|
|
connection conn1;
|
|
ERROR 70100: Query execution was interrupted
|
|
disconnect conn1;
|
|
connection default;
|
|
SET GLOBAL regexp_time_limit = DEFAULT;
|
|
#
|
|
# Bug#27252630: REGEXP FUNCTIONS IGNORE NULLS
|
|
#
|
|
SELECT regexp_instr ( 'a', 'a', NULL );
|
|
regexp_instr ( 'a', 'a', NULL )
|
|
NULL
|
|
SELECT regexp_instr ( 'a', 'a', 1, NULL );
|
|
regexp_instr ( 'a', 'a', 1, NULL )
|
|
NULL
|
|
SELECT regexp_instr ( 'a', 'a', 1, 0, NULL );
|
|
regexp_instr ( 'a', 'a', 1, 0, NULL )
|
|
NULL
|
|
SELECT regexp_instr ( 'a', 'a', 1, 0, 0, NULL );
|
|
regexp_instr ( 'a', 'a', 1, 0, 0, NULL )
|
|
NULL
|
|
SELECT regexp_like ( 'a', 'a', NULL );
|
|
regexp_like ( 'a', 'a', NULL )
|
|
NULL
|
|
SELECT regexp_replace ( 'a', 'a', 'a', NULL );
|
|
regexp_replace ( 'a', 'a', 'a', NULL )
|
|
NULL
|
|
SELECT regexp_replace ( 'a', 'a', 'a', 1, NULL );
|
|
regexp_replace ( 'a', 'a', 'a', 1, NULL )
|
|
NULL
|
|
SELECT regexp_replace ( 'a', 'a', 'a', 1, 0, NULL );
|
|
regexp_replace ( 'a', 'a', 'a', 1, 0, NULL )
|
|
NULL
|
|
SELECT regexp_substr ( 'a', 'a', NULL );
|
|
regexp_substr ( 'a', 'a', NULL )
|
|
NULL
|
|
SELECT regexp_substr ( 'a', 'a', 1, NULL );
|
|
regexp_substr ( 'a', 'a', 1, NULL )
|
|
NULL
|
|
SELECT regexp_substr ( 'a', 'a', 1, 0, NULL );
|
|
regexp_substr ( 'a', 'a', 1, 0, NULL )
|
|
NULL
|
|
#
|
|
# Bug#27232647: REGEX: ASSERTION FAILED: !STR || STR != M_PTR
|
|
#
|
|
SELECT regexp_like( reverse(''), 123 );
|
|
regexp_like( reverse(''), 123 )
|
|
0
|
|
SELECT regexp_like( soundex(@v1), 'abc' );
|
|
regexp_like( soundex(@v1), 'abc' )
|
|
NULL
|
|
SELECT regexp_like( left('', ''), 'abc' );
|
|
regexp_like( left('', ''), 'abc' )
|
|
0
|
|
Warnings:
|
|
Warning 1292 Truncated incorrect INTEGER value: ''
|
|
Warning 1292 Truncated incorrect INTEGER value: ''
|
|
SELECT regexp_like( repeat(@v1, 'abc'), 'abc' );
|
|
regexp_like( repeat(@v1, 'abc'), 'abc' )
|
|
NULL
|
|
Warnings:
|
|
Warning 1292 Truncated incorrect INTEGER value: 'abc'
|
|
Warning 1292 Truncated incorrect INTEGER value: 'abc'
|
|
#
|
|
# Bug#27597980: ADD ERROR MESSAGE FOR INVALID CAPTURE GROUP NAME
|
|
#
|
|
SELECT regexp_replace( 'abc' , 'abc', '$abc' );
|
|
ERROR HY000: A capture group has an invalid name.
|
|
#
|
|
# Bug#27595368: ASSERTION FAILED: 0 IN ITEM_PARAM::VAL_REAL WITH
|
|
# PREPARED STATEMENT
|
|
#
|
|
SET @s := "SELECT regexp_like( '', '', ? / '' )";
|
|
PREPARE stmt FROM @s;
|
|
EXECUTE stmt;
|
|
ERROR HY000: Incorrect arguments to EXECUTE
|
|
EXECUTE stmt;
|
|
ERROR HY000: Incorrect arguments to EXECUTE
|
|
CREATE TABLE t1 ( match_parameter CHAR(1) );
|
|
INSERT INTO t1 VALUES ( 'i' ), ( 'c' ), ( 'i' ), ( 'c' );
|
|
SELECT match_parameter, regexp_like ( 'a', 'A', match_parameter ) FROM t1;
|
|
match_parameter regexp_like ( 'a', 'A', match_parameter )
|
|
i 1
|
|
c 0
|
|
i 1
|
|
c 0
|
|
DROP TABLE t1;
|
|
#
|
|
# Bug#27612255: VALGRIND WARNING ON INVALID CAPTURE GROUP
|
|
#
|
|
SELECT regexp_replace( ' F' , '^ ', '[,$' );
|
|
ERROR HY000: A capture group has an invalid name.
|
|
#
|
|
# Bug#27751277: ASSERTION FALSE IN REGEXP::CHECK_ICU_STATUS
|
|
#
|
|
SELECT regexp_instr( 'abc', '(?-' );
|
|
ERROR HY000: Invalid match mode flag in regular expression.
|
|
#
|
|
# Bug#27743722 REGEXP::EVALEXPRTOCHARSET:
|
|
# REFERENCE BINDING TO MISALIGNED ADDRESS
|
|
#
|
|
select regexp_instr(char('313:50:35.199734'using utf16le),uuid());
|
|
regexp_instr(char('313:50:35.199734'using utf16le),uuid())
|
|
0
|
|
Warnings:
|
|
Warning 1292 Truncated incorrect INTEGER value: '313:50:35.199734'
|
|
#
|
|
# Bug#27992118: REGEXP_REPLACE ACCUMULATING RESULT
|
|
#
|
|
CREATE TABLE t1 ( a VARCHAR(10) );
|
|
INSERT INTO t1 VALUES ('a a a'), ('b b b'), ('c c c');
|
|
SELECT regexp_replace(a, '^([[:alpha:]]+)[[:space:]].*$', '$1') FROM t1;
|
|
regexp_replace(a, '^([[:alpha:]]+)[[:space:]].*$', '$1')
|
|
a
|
|
b
|
|
c
|
|
DROP TABLE t1;
|
|
#
|
|
# Bug#28027093: REGEXP_REPLACE TRUNCATE UPDATE
|
|
#
|
|
CREATE TABLE t1 ( a CHAR(3) );
|
|
INSERT INTO t1 VALUES ( regexp_replace ('a', 'a', 'x') );
|
|
SELECT * FROM t1;
|
|
a
|
|
x
|
|
UPDATE t1 SET a = regexp_replace ( 'b', 'b', 'y' );
|
|
SELECT * FROM t1;
|
|
a
|
|
y
|
|
DROP TABLE t1;
|
|
CREATE TABLE t1 ( a CHAR(3) );
|
|
INSERT INTO t1 VALUES ( regexp_substr ('a', 'a', 1) );
|
|
SELECT * FROM t1;
|
|
a
|
|
a
|
|
UPDATE t1 SET a = regexp_substr ('b', 'b', 1);
|
|
SELECT * FROM t1;
|
|
a
|
|
b
|
|
DROP TABLE t1;
|
|
#
|
|
# Bug#27682225: WRONG METADATA FOR REGEXP FUNCTIONS
|
|
#
|
|
CREATE TABLE t1 AS SELECT
|
|
regexp_instr( 'a', 'a' ) AS a,
|
|
regexp_like( 'a', 'a' ) AS b,
|
|
regexp_replace( 'abc', 'b', 'x' ) AS c,
|
|
regexp_substr( 'a', 'a' ) AS d,
|
|
regexp_substr( repeat('a', 512), 'a' ) AS e,
|
|
regexp_substr( repeat('a', 513), 'a' ) AS f;
|
|
SHOW CREATE TABLE t1;
|
|
Table Create Table
|
|
t1 CREATE TABLE `t1` (
|
|
`a` bigint(21) NOT NULL DEFAULT '0',
|
|
`b` int(1) NOT NULL DEFAULT '0',
|
|
`c` longtext CHARACTER SET latin1,
|
|
`d` varchar(1) CHARACTER SET latin1 NOT NULL DEFAULT '',
|
|
`e` varchar(512) CHARACTER SET latin1 DEFAULT NULL,
|
|
`f` text CHARACTER SET latin1
|
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci
|
|
SELECT * FROM t1;
|
|
a b c d e f
|
|
1 1 axc a a a
|
|
DROP TABLE t1;
|
|
#
|
|
# Bug#29231490: REGEXP FUNCTIONS FAIL TO USE CODEPOINT POSITIONS
|
|
#
|
|
# Sushi emoji, may not render in your editor.
|
|
SET NAMES DEFAULT;
|
|
SELECT regexp_instr( '🍣🍣a', '🍣', 2 );
|
|
regexp_instr( '??a', '?', 2 )
|
|
2
|
|
SELECT regexp_instr( '🍣🍣a', 'a', 3 );
|
|
regexp_instr( '??a', 'a', 3 )
|
|
3
|
|
SELECT regexp_instr( '🍣🍣a', 'a', 4 );
|
|
ERROR HY000: Index out of bounds in regular expression search.
|
|
SELECT regexp_substr( 'a🍣b', '.', 1 );
|
|
regexp_substr( 'a?b', '.', 1 )
|
|
a
|
|
SELECT regexp_substr( 'a🍣b', '.', 2 );
|
|
regexp_substr( 'a?b', '.', 2 )
|
|
🍣
|
|
SELECT regexp_substr( 'a🍣b', '.', 3 );
|
|
regexp_substr( 'a?b', '.', 3 )
|
|
b
|
|
SELECT regexp_substr( 'a🍣b', '.', 4 );
|
|
regexp_substr( 'a?b', '.', 4 )
|
|
NULL
|
|
SELECT regexp_substr( 'a🍣🍣b', '.', 1 );
|
|
regexp_substr( 'a??b', '.', 1 )
|
|
a
|
|
SELECT regexp_substr( 'a🍣🍣b', '.', 2 );
|
|
regexp_substr( 'a??b', '.', 2 )
|
|
🍣
|
|
SELECT regexp_substr( 'a🍣🍣b', '.', 3 );
|
|
regexp_substr( 'a??b', '.', 3 )
|
|
🍣
|
|
SELECT regexp_substr( 'a🍣🍣b', '.', 4 );
|
|
regexp_substr( 'a??b', '.', 4 )
|
|
b
|
|
SELECT regexp_substr( 'a🍣🍣b', '.', 5 );
|
|
regexp_substr( 'a??b', '.', 5 )
|
|
NULL
|
|
SELECT regexp_replace( '🍣🍣🍣', '.', 'a', 2 );
|
|
regexp_replace( '???', '.', 'a', 2 )
|
|
🍣aa
|
|
SELECT regexp_replace( '🍣🍣🍣', '.', 'a', 2, 2 );
|
|
regexp_replace( '???', '.', 'a', 2, 2 )
|
|
🍣🍣a
|
|
#
|
|
# REGEXP_REPLACE DOES NOT CONVERT RESULT CHARACTER SET
|
|
#
|
|
SELECT hex(regexp_replace( convert( 'abcd' using utf8mb4 ), 'c', ''));
|
|
hex(regexp_replace( convert( 'abcd' using utf8mb4 ), 'c', ''))
|
|
616264
|
|
SELECT hex(regexp_replace( convert( 'abcd' using utf16 ), 'c', ''));
|
|
hex(regexp_replace( convert( 'abcd' using utf16 ), 'c', ''))
|
|
006100620064
|
|
SELECT hex(regexp_substr( convert( 'abcd' using utf8mb4 ), 'abc'));
|
|
hex(regexp_substr( convert( 'abcd' using utf8mb4 ), 'abc'))
|
|
616263
|
|
SELECT hex(regexp_substr( convert( 'abcd' using utf16 ), 'abc'));
|
|
hex(regexp_substr( convert( 'abcd' using utf16 ), 'abc'))
|
|
006100620063
|
|
#
|
|
# Test of the code path that elides character set conversion when the
|
|
# target column has the same character set as ICU produces. This depends
|
|
# on the architecture, and so we try both big and little endian.
|
|
#
|
|
CREATE TABLE t1 (
|
|
a CHAR(10) CHARACTER SET utf16le,
|
|
b CHAR(10) CHARACTER SET utf16
|
|
);
|
|
INSERT INTO t1 VALUES (
|
|
regexp_substr( convert('abcd' using utf16le), 'abc' ),
|
|
regexp_substr( convert('abcd' using utf16), 'abc' ));
|
|
INSERT INTO t1 VALUES (
|
|
regexp_substr( 'abcd', 'abc' ),
|
|
regexp_substr( 'abcd', 'abc' ));
|
|
SELECT * FROM t1;
|
|
a b
|
|
abc abc
|
|
abc abc
|
|
DROP TABLE t1;
|