# # Tests of regular expression functions. # CREATE TABLE t1 ( subject char(10), pattern char(10) ); # # REGEXP_INSTR # # REGEXP_INSTR(), two arguments. SELECT regexp_instr( 'abc', 'a' ); regexp_instr( 'abc', 'a' ) 1 SELECT regexp_instr( 'abc', 'b' ); regexp_instr( 'abc', 'b' ) 2 SELECT regexp_instr( 'abc', 'c' ); regexp_instr( 'abc', 'c' ) 3 SELECT regexp_instr( 'abc', 'd' ); regexp_instr( 'abc', 'd' ) 0 SELECT regexp_instr( NULL, 'a' ); regexp_instr( NULL, 'a' ) NULL SELECT regexp_instr( 'a', NULL ); regexp_instr( 'a', NULL ) NULL SELECT regexp_instr( NULL, NULL ); regexp_instr( NULL, NULL ) NULL # This tests that the correct character set is declared. If we don't # declare it correctly, the UTF-16 sequence will be interpreted as a # two-byte string consisting of the ASCII for '1' and NUL, and the # result will be 3100. SET NAMES binary; SELECT hex( concat(regexp_instr( 'a', 'a' )) ); hex( concat(regexp_instr( 'a', 'a' )) ) 31 SET NAMES DEFAULT; # REGEXP_INSTR(), illegal types. SELECT regexp_instr( 1, 'a' ); regexp_instr( 1, 'a' ) 0 SELECT regexp_instr( 1.1, 'a' ); regexp_instr( 1.1, 'a' ) 0 SELECT regexp_instr( 'a', 1 ); regexp_instr( 'a', 1 ) 0 SELECT regexp_instr( 'a', 1.1 ); regexp_instr( 'a', 1.1 ) 0 SELECT regexp_instr( subject, pattern ) FROM t1; regexp_instr( subject, pattern ) SELECT regexp_instr( 'a', '[[:invalid_bracket_expression:]]' ); ERROR HY000: Illegal argument to a regular expression. # REGEXP_INSTR(), three arguments. # subject, pattern, position. SELECT regexp_instr( 'abcabcabc', 'a+', 1 ); regexp_instr( 'abcabcabc', 'a+', 1 ) 1 SELECT regexp_instr( 'abcabcabc', 'a+', 2 ); regexp_instr( 'abcabcabc', 'a+', 2 ) 4 SELECT regexp_instr( 'abcabcabc', 'b+', 1 ); regexp_instr( 'abcabcabc', 'b+', 1 ) 2 SELECT regexp_instr( 'abcabcabc', 'b+', 2 ); regexp_instr( 'abcabcabc', 'b+', 2 ) 2 SELECT regexp_instr( 'abcabcabc', 'b+', 3 ); regexp_instr( 'abcabcabc', 'b+', 3 ) 5 SELECT regexp_instr( 'a', 'a+', 3 ); ERROR HY000: Index out of bounds in regular expression search. # REGEXP_INSTR(), four arguments. # subject, pattern, position, occurence. SELECT regexp_instr( 'abcabcabc', 'a+', 1, 2 ); regexp_instr( 'abcabcabc', 'a+', 1, 2 ) 4 SELECT regexp_instr( 'abcabcabc', 'a+', 1, 3 ); regexp_instr( 'abcabcabc', 'a+', 1, 3 ) 7 SELECT regexp_instr( 'abcabcabc', 'a+', 1, 4 ); regexp_instr( 'abcabcabc', 'a+', 1, 4 ) 0 SELECT regexp_instr( 'abcabcabc', 'a+', 4, 2 ); regexp_instr( 'abcabcabc', 'a+', 4, 2 ) 7 # REGEXP_INSTR(), five arguments. # subject, pattern, position, occurence, return_option SELECT regexp_instr( 'a', 'a+', 1, 1, 2 ); ERROR HY000: Incorrect arguments to regexp_instr: return_option must be 1 or 0. SELECT regexp_instr( 'a', 'a+', 1, 1, -1 ); ERROR HY000: Incorrect arguments to regexp_instr: return_option must be 1 or 0. SELECT regexp_instr( 'a', 'a+', 1, 1, NULL ); regexp_instr( 'a', 'a+', 1, 1, NULL ) NULL SELECT regexp_instr( 'abcabcabc', 'a+', 1, 1, 0 ); regexp_instr( 'abcabcabc', 'a+', 1, 1, 0 ) 1 SELECT regexp_instr( 'abcabcabc', 'a+', 1, 1, 1 ); regexp_instr( 'abcabcabc', 'a+', 1, 1, 1 ) 2 SELECT regexp_instr( 'aaabcabcabc', 'a+', 1, 1, 1 ); regexp_instr( 'aaabcabcabc', 'a+', 1, 1, 1 ) 4 # REGEXP_INSTR(), six arguments. # subject, pattern, position, occurence, return_option, match_parameter SELECT regexp_instr( 'aaabcabcabc', 'A+', 1, 1, 1, 'c' ); regexp_instr( 'aaabcabcabc', 'A+', 1, 1, 1, 'c' ) 0 SELECT regexp_instr( 'aaabcabcabc', 'A+', 1, 1, 1, 'i' ); regexp_instr( 'aaabcabcabc', 'A+', 1, 1, 1, 'i' ) 4 SELECT regexp_instr( 'aaabcabcabc', 'A+', 1, 1, 1, 'ci' ); regexp_instr( 'aaabcabcabc', 'A+', 1, 1, 1, 'ci' ) 4 SELECT regexp_instr( 'aaabcabcabc', 'A+', 1, 1, 1, 'cic' ); regexp_instr( 'aaabcabcabc', 'A+', 1, 1, 1, 'cic' ) 0 SELECT regexp_instr( 'a', 'a+', 1, 1, 1, 'x' ); ERROR HY000: Incorrect arguments to regexp_instr SELECT regexp_instr( 'a', 'a+', 1, 1, 1, NULL ); regexp_instr( 'a', 'a+', 1, 1, 1, NULL ) NULL # # REGEXP_LIKE # # REGEXP_LIKE(), two arguments. SELECT regexp_like( 'abc', 'a' ); regexp_like( 'abc', 'a' ) 1 SELECT regexp_like( 'abc', 'b' ); regexp_like( 'abc', 'b' ) 1 SELECT regexp_like( 'abc', 'c' ); regexp_like( 'abc', 'c' ) 1 SELECT regexp_like( 'abc', 'd' ); regexp_like( 'abc', 'd' ) 0 SELECT regexp_like( 'a', 'a.*' ); regexp_like( 'a', 'a.*' ) 1 SELECT regexp_like( 'ab', 'a.*' ); regexp_like( 'ab', 'a.*' ) 1 SELECT regexp_like( NULL, 'a' ); regexp_like( NULL, 'a' ) NULL SELECT regexp_like( 'a', NULL ); regexp_like( 'a', NULL ) NULL SELECT regexp_like( NULL, NULL ); regexp_like( NULL, NULL ) NULL # This tests that the correct character set is declared. If we don't # declare it correctly, the UTF-16 sequence will be interpreted as a # two-byte string consisting of the ASCII for '1' and NUL, and the # result will be 3100. SET NAMES binary; SELECT hex( concat(regexp_like( 'a', 'a' )) ); hex( concat(regexp_like( 'a', 'a' )) ) 31 SET NAMES DEFAULT; # REGEXP_LIKE(), three arguments. SELECT regexp_like( 'abc', 'A', 'i' ); regexp_like( 'abc', 'A', 'i' ) 1 SELECT regexp_like( 'abc', 'A', 'c' ); regexp_like( 'abc', 'A', 'c' ) 0 SELECT regexp_like( 'a', 'a+', 'x' ); ERROR HY000: Incorrect arguments to regexp_like SELECT regexp_like( 'a', 'a+', 'cmnux' ); ERROR HY000: Incorrect arguments to regexp_like SELECT regexp_like( 'a', 'a+', NULL ); regexp_like( 'a', 'a+', NULL ) NULL # REGEXP_LIKE(), illegal types. SELECT regexp_like( 1, 'a' ); regexp_like( 1, 'a' ) 0 SELECT regexp_like( 1.1, 'a' ); regexp_like( 1.1, 'a' ) 0 SELECT regexp_like( 'a', 1 ); regexp_like( 'a', 1 ) 0 SELECT regexp_like( 'a', 1.1 ); regexp_like( 'a', 1.1 ) 0 SELECT regexp_like('a', '[[:invalid_bracket_expression:]]'); ERROR HY000: Illegal argument to a regular expression. # # REGEXP_REPLACE # # REGEXP_REPLACE(), three arguments. SELECT regexp_replace( 'aaa', 'a', 'X' ); regexp_replace( 'aaa', 'a', 'X' ) XXX SELECT regexp_replace( 'abc', 'b', 'X' ); regexp_replace( 'abc', 'b', 'X' ) aXc SELECT regexp_replace( NULL, 'a', 'X' ); regexp_replace( NULL, 'a', 'X' ) NULL SELECT regexp_replace( 'aaa', NULL, 'X' ); regexp_replace( 'aaa', NULL, 'X' ) NULL SELECT regexp_replace( 'aaa', 'a', NULL ); regexp_replace( 'aaa', 'a', NULL ) NULL # This tests that the correct character set is declared. If we don't # declare it correctly, the UTF-16 sequence will be interpreted as a # zero-terminated string consisting of 'X', and the # result will thus be 'X'. SELECT concat( regexp_replace( 'aaa', 'a', 'X' ), 'x' ); concat( regexp_replace( 'aaa', 'a', 'X' ), 'x' ) XXXx # REGEXP_REPLACE(), four arguments. SELECT regexp_replace( 'aaa', 'a', 'X', 0 ); ERROR 42000: Incorrect parameters in the call to native function 'regexp_replace' SELECT regexp_replace( 'aaa', 'a', 'X', 1 ); regexp_replace( 'aaa', 'a', 'X', 1 ) XXX SELECT regexp_replace( 'a', 'a+', 'b', 3 ); ERROR HY000: Index out of bounds in regular expression search. # REGEXP_REPLACE(), five arguments. SELECT regexp_replace( 'aaabbccbbddaa', 'b+', 'X', 0, 1 ); ERROR 42000: Incorrect parameters in the call to native function 'regexp_replace' SELECT regexp_replace( 'aaabbccbbddaa', 'b+', 'X', 1, 1 ); regexp_replace( 'aaabbccbbddaa', 'b+', 'X', 1, 1 ) aaaXccbbddaa SELECT regexp_replace( 'aaabbccbbddaa', 'b+', 'X', 1, 2 ); regexp_replace( 'aaabbccbbddaa', 'b+', 'X', 1, 2 ) aaabbccXddaa SELECT regexp_replace( 'aaabbccbbddaa', '(b+)', '<$1>', 1, 2 ); regexp_replace( 'aaabbccbbddaa', '(b+)', '<$1>', 1, 2 ) aaabbccddaa SELECT regexp_replace( 'aaabbccbbddaa', 'x+', 'x', 1, 0 ); regexp_replace( 'aaabbccbbddaa', 'x+', 'x', 1, 0 ) aaabbccbbddaa SELECT regexp_replace( 'aaabbccbbddaa', 'b+', 'x', 1, 0 ); regexp_replace( 'aaabbccbbddaa', 'b+', 'x', 1, 0 ) aaaxccxddaa SELECT regexp_replace( 'aaab', 'b', 'x', 1, 2 ); regexp_replace( 'aaab', 'b', 'x', 1, 2 ) aaab SELECT regexp_replace( 'aaabccc', 'b', 'x', 1, 2 ); regexp_replace( 'aaabccc', 'b', 'x', 1, 2 ) aaabccc SELECT regexp_replace( 'abc', 'b', 'X' ); regexp_replace( 'abc', 'b', 'X' ) aXc SELECT regexp_replace( 'abcbdb', 'b', 'X' ); regexp_replace( 'abcbdb', 'b', 'X' ) aXcXdX SELECT regexp_replace( 'abcbdb', 'b', 'X', 3 ); regexp_replace( 'abcbdb', 'b', 'X', 3 ) abcXdX SELECT regexp_replace( 'aaabcbdb', 'b', 'X', 1 ); regexp_replace( 'aaabcbdb', 'b', 'X', 1 ) aaaXcXdX SELECT regexp_replace( 'aaabcbdb', 'b', 'X', 2 ); regexp_replace( 'aaabcbdb', 'b', 'X', 2 ) aaaXcXdX SELECT regexp_replace( 'aaabcbdb', 'b', 'X', 3 ); regexp_replace( 'aaabcbdb', 'b', 'X', 3 ) aaaXcXdX SELECT regexp_replace( 'a', '[[:invalid_bracket_expression:]]', '$1' ); ERROR HY000: Illegal argument to a regular expression. # # Test of the dynamic buffer in REGEXP_REPLACE. # SELECT regexp_replace( 'aaa', 'a', 'X', 2 ); regexp_replace( 'aaa', 'a', 'X', 2 ) aXX SELECT regexp_replace( 'aaa', 'a', 'XX', 2 ); regexp_replace( 'aaa', 'a', 'XX', 2 ) aXXXX # # REGEXP_SUBSTR # SELECT regexp_substr( 'a' ); ERROR 42000: Incorrect parameter count in the call to native function 'regexp_substr' SELECT regexp_substr( 'a', 'b', 'c', 'd', 'e', 'f' ); ERROR 42000: Incorrect parameter count in the call to native function 'regexp_substr' # REGEXP_SUBSTR(), two arguments. SELECT regexp_substr( 'ab ac ad', '.d' ); regexp_substr( 'ab ac ad', '.d' ) ad SELECT regexp_substr( 'ab ac ad', '.D' ); regexp_substr( 'ab ac ad', '.D' ) ad # This tests that the correct character set is declared. If we don't # declare it correctly, the UTF-16 sequence will be interpreted as a # zero-terminated string consisting of 'a', and the # result will thus be 'a'. SELECT concat( regexp_substr( 'aaa', 'a+' ), 'x' ); concat( regexp_substr( 'aaa', 'a+' ), 'x' ) aaax # REGEXP_SUBSTR(), three arguments. SELECT regexp_substr( 'ab ac ad', 'a.', 0 ); ERROR 42000: Incorrect parameters in the call to native function 'regexp_substr' SELECT regexp_substr( 'ab ac ad', 'A.', 3 ); regexp_substr( 'ab ac ad', 'A.', 3 ) ac SELECT regexp_substr( 'ab ac ad', 'A.', 3, 1 ); regexp_substr( 'ab ac ad', 'A.', 3, 1 ) ac SELECT regexp_substr( 'ab ac ad', 'A.', 3, 2 ); regexp_substr( 'ab ac ad', 'A.', 3, 2 ) ad SELECT regexp_substr( 'ab ac ad', 'A.', 3, 3 ); regexp_substr( 'ab ac ad', 'A.', 3, 3 ) NULL SELECT regexp_substr( 'ab ac ad', 'A.', 3, 3 ) IS NULL; regexp_substr( 'ab ac ad', 'A.', 3, 3 ) IS NULL 0 # REGEXP_SUBSTR(), four arguments. SELECT regexp_substr( 'ab ac ad', 'A.', 1, 1, 'c' ); regexp_substr( 'ab ac ad', 'A.', 1, 1, 'c' ) NULL SELECT regexp_substr( 'ab\nac\nad', 'A.', 1, 1, 'i' ); regexp_substr( 'ab\nac\nad', 'A.', 1, 1, 'i' ) ab SELECT regexp_substr( 'ab\nac\nad', 'A.', 1, 1, 'im' ); regexp_substr( 'ab\nac\nad', 'A.', 1, 1, 'im' ) ab SELECT regexp_substr('a', '[[:invalid_bracket_expression:]]'); ERROR HY000: Illegal argument to a regular expression. SET sql_mode = ''; CREATE TABLE t2 ( g GEOMETRY NOT NULL ); INSERT INTO t2 VALUES ( POINT(1,2) ); SELECT concat( regexp_like(g, g), 'x' ) FROM t2; concat( regexp_like(g, g), 'x' ) 0x SET sql_mode = DEFAULT; DROP TABLE t2; # DROP TABLE t1; # # Error handling. # # Error handling: Stack limit. # The following queries are from bug#24449090 SELECT regexp_instr( '', '((((((((){80}){}){11}){11}){11}){80}){11}){4}' ); ERROR HY000: Incorrect description of a {min,max} interval. # Query and result logs turned off from here ... # ... to here. The reason is that we're testing the stack limit feature, # which is not present (or working) in ICU 5.0 or earlier. We don't need # the query or result/error log to test the feature, though. The above # tests still fail unless the proper error is thrown. # # Character set conversion. # SET NAMES latin1; SELECT regexp_instr( _latin1 x'61F662', _latin1 x'F6' ); regexp_instr( _latin1 x'61F662', _latin1 x'F6' ) 2 SELECT regexp_instr( _latin1 x'61F662', _utf8mb4'ö' ); regexp_instr( _latin1 x'61F662', _utf8mb4'ö' ) 2 SELECT regexp_instr( concat('a', _utf8mb4 x'F09F8DA3'), _utf8mb4 x'F09F8DA3' ); regexp_instr( concat('a', _utf8mb4 x'F09F8DA3'), _utf8mb4 x'F09F8DA3' ) 2 SELECT regexp_instr( _utf8mb4'aöb', _utf8mb4'ö' ); regexp_instr( _utf8mb4'aöb', _utf8mb4'ö' ) 2 SET NAMES utf8; Warnings: Warning 3719 'utf8' is currently an alias for the character set UTF8MB3, but will be an alias for UTF8MB4 in a future release. Please consider using UTF8MB4 in order to be unambiguous. SELECT regexp_instr( 'aöb', 'ö' ); regexp_instr( 'aöb', 'ö' ) 2 SET NAMES DEFAULT; # # Bug#30241: Regular expression problems # SET NAMES utf8; Warnings: Warning 3719 'utf8' is currently an alias for the character set UTF8MB3, but will be an alias for UTF8MB4 in a future release. Please consider using UTF8MB4 in order to be unambiguous. SELECT regexp_instr( 'אב רק', /*k*/'^[^ב]' ); regexp_instr( 'אב רק', /*k*/'^[^ב]' ) 1 PREPARE stmt1 FROM "select 'a' rlike ?"; DEALLOCATE PREPARE stmt1; CREATE TABLE t1( a INT, subject CHAR(10) ); CREATE TABLE t2( pattern CHAR(10) ); insert into t1 values (0, 'apa'); insert into t2 values ('apa'); CREATE DEFINER=root@localhost PROCEDURE p1() BEGIN UPDATE t1, t2 SET a = 1 WHERE regexp_like(t1.subject, t2.pattern); END|| CALL p1(); DROP PROCEDURE p1; DROP TABLE t1, t2; CREATE TABLE t1 ( a INT ); EXPLAIN SELECT 1 FROM t1 WHERE 1 REGEXP (1 IN (SELECT 1 FROM t1)); id select_type table partitions type possible_keys key key_len ref rows filtered Extra 1 PRIMARY NULL NULL NULL NULL NULL NULL NULL NULL NULL Impossible WHERE 2 SUBQUERY t1 NULL ALL NULL NULL NULL NULL 1 100.00 NULL Warnings: Note 1003 /* select#1 */ select 1 AS `1` from `test`.`t1` where false DROP TABLE t1; PREPARE stmt1 FROM "SELECT regexp_like( 'a', ? )"; PREPARE stmt2 FROM "SELECT regexp_like( ?, 'a' )"; PREPARE stmt3 FROM "SELECT regexp_like( ?, ? )"; SET @subject = 'a'; SET @pattern = 'a+'; EXECUTE stmt1 USING @pattern; regexp_like( 'a', ? ) 1 EXECUTE stmt2 USING @subject; regexp_like( ?, 'a' ) 1 EXECUTE stmt3 USING @subject, @pattern; regexp_like( ?, ? ) 1 # # Yes, you can circumvent the type checking using prepared statements. # SET @subject = 1; SET @pattern = 1; EXECUTE stmt1 USING @pattern; regexp_like( 'a', ? ) 0 EXECUTE stmt2 USING @subject; regexp_like( ?, 'a' ) 0 EXECUTE stmt3 USING @subject, @pattern; regexp_like( ?, ? ) 1 DEALLOCATE PREPARE stmt1; DEALLOCATE PREPARE stmt2; DEALLOCATE PREPARE stmt3; # Let's make sure we handle arguments that raise errors when evaluated. DO 1 rlike multilinestring(point(1, 1)); ERROR HY000: Incorrect arguments to multilinestring # # Test of repeated use of one matcher. # CREATE TABLE t1 ( a CHAR(10) ); INSERT INTO t1 VALUES ( 'abc' ), ( 'bcd' ), ( 'cde' ); SELECT regexp_like( a, 'a' ) FROM t1; regexp_like( a, 'a' ) 1 0 0 DROP TABLE t1; CREATE TABLE t1 ( a CHAR ( 10 ), b CHAR ( 10 ) ); INSERT INTO t1 VALUES( NULL, 'abc' ); INSERT INTO t1 VALUES( 'def', NULL ); SELECT a, b, regexp_like( a, b ) FROM t1; a b regexp_like( a, b ) NULL abc NULL def NULL NULL DROP TABLE t1; # # Generated columns. # CREATE TABLE t1 ( c CHAR(10) CHARSET latin1 COLLATE latin1_bin, c_ci CHAR(10) CHARSET latin1 COLLATE latin1_general_ci, c_cs CHAR(10) CHARSET latin1 COLLATE latin1_general_cs ); INSERT INTO t1 VALUES ( 'a', 'a', 'a' ), ( 'A', 'A', 'A' ), ( 'b', 'b', 'b' ); SELECT c, c_ci REGEXP 'A', c_cs REGEXP 'A' FROM t1; c c_ci REGEXP 'A' c_cs REGEXP 'A' A 1 1 a 1 0 b 0 0 DROP TABLE t1; SELECT regexp_like( _utf8mb4 'ss' COLLATE utf8mb4_german2_ci, _utf8mb4 'ß' COLLATE utf8mb4_german2_ci ); regexp_like( _utf8mb4 'ss' COLLATE utf8mb4_german2_ci, _utf8mb4 'ß' COLLATE utf8mb4_german2_ci ) 1 SELECT regexp_like( _utf8mb4 'ß' COLLATE utf8mb4_german2_ci, _utf8mb4 'ss' ); regexp_like( _utf8mb4 'ß' COLLATE utf8mb4_german2_ci, _utf8mb4 'ss' ) 1 SELECT regexp_like( _utf8mb4 'ß' COLLATE utf8mb4_de_pb_0900_as_cs, _utf8mb4 'ss' ); regexp_like( _utf8mb4 'ß' COLLATE utf8mb4_de_pb_0900_as_cs, _utf8mb4 'ss' ) 0 # # Regression testing. # SET NAMES latin1; SELECT regexp_like( 'a', 'A' COLLATE latin1_general_ci ); regexp_like( 'a', 'A' COLLATE latin1_general_ci ) 1 SELECT 'a' REGEXP 'A' COLLATE latin1_general_ci; 'a' REGEXP 'A' COLLATE latin1_general_ci 1 SELECT regexp_like( 'a', 'A' COLLATE latin1_general_cs ); regexp_like( 'a', 'A' COLLATE latin1_general_cs ) 0 SELECT 'a' REGEXP 'A' COLLATE latin1_general_cs; 'a' REGEXP 'A' COLLATE latin1_general_cs 0 # The default behaviour, multiline match is off: SELECT regexp_like( 'a\nb\nc', '^b$' ); regexp_like( 'a\nb\nc', '^b$' ) 0 # Enabling the multiline option using the PCRE option syntax: # (Previously not accepted) SELECT regexp_like( 'a\nb\nc', '(?m)^b$' ); regexp_like( 'a\nb\nc', '(?m)^b$' ) 1 # Dot matches newline: SELECT regexp_like( 'a\nb\nc', '.*' ); regexp_like( 'a\nb\nc', '.*' ) 1 SELECT regexp_like( _utf16 'a' , 'a' ); regexp_like( _utf16 'a' , 'a' ) 1 SELECT regexp_like( _utf16le 'a' , 'a' ); regexp_like( _utf16le 'a' , 'a' ) 0 # Make sure we exit on error. SELECT regexp_like( 'aaa', 'a+', 1 ); ERROR HY000: Incorrect arguments to regexp_like SELECT regexp_substr( 'aaa', 'a+', 1, 1, 1 ); ERROR HY000: Incorrect arguments to regexp_substr SELECT regexp_substr( 'aaa', 'a+', 1, 1, 1 ); ERROR HY000: Incorrect arguments to regexp_substr SELECT regexp_substr( 'aaa', '+' ); ERROR HY000: Syntax error in regular expression on line 1, character 1. # # Test of a table where the columns are already in the lib's charset and # contain NULL. # CREATE TABLE t1 ( a CHAR(3) CHARACTER SET utf16le, b CHAR(3) CHARACTER SET utf16le ); INSERT INTO t1 VALUES ( NULL, 'abc' ); INSERT INTO t1 VALUES ( 'def', NULL ); INSERT INTO t1 VALUES ( NULL, NULL ); SELECT a regexp b FROM t1; a regexp b NULL NULL NULL DROP TABLE t1; # # Typecasts. # CREATE TABLE t1 ( a REAL, b INT, c CHAR(100), d DECIMAL ); INSERT INTO t1 VALUES ( regexp_instr('a', 'a'), regexp_instr('a', 'a'), regexp_instr('a', 'a'), regexp_instr('a', 'a') ); SELECT * FROM t1; a b c d 1 1 1 1 DELETE FROM t1; INSERT INTO t1 VALUES ( regexp_like('a', 'a'), regexp_like('a', 'a'), regexp_like('a', 'a'), regexp_like('a', 'a') ); SELECT * FROM t1; a b c d 1 1 1 1 DELETE FROM t1; INSERT INTO t1 ( a ) VALUES ( regexp_replace('a', 'a', 'a') ); ERROR 01000: Data truncated for column 'a' at row 1 INSERT INTO t1 ( b ) VALUES ( regexp_replace('a', 'a', 'a') ); ERROR HY000: Incorrect integer value: 'a' for column 'b' at row 1 INSERT INTO t1 ( c ) VALUES ( regexp_replace('a', 'a', 'a') ); INSERT INTO t1 ( d ) VALUES ( regexp_replace('a', 'a', 'a') ); ERROR HY000: Incorrect decimal value: 'a' for column 'd' at row 1 SELECT * FROM t1; a b c d NULL NULL a NULL DELETE FROM t1; INSERT INTO t1 ( a ) VALUES ( regexp_substr('a', 'a') ); ERROR 01000: Data truncated for column 'a' at row 1 INSERT INTO t1 ( b ) VALUES ( regexp_substr('a', 'a') ); ERROR HY000: Incorrect integer value: 'a' for column 'b' at row 1 INSERT INTO t1 ( c ) VALUES ( regexp_substr('a', 'a') ); INSERT INTO t1 ( d ) VALUES ( regexp_substr('a', 'a') ); ERROR HY000: Incorrect decimal value: 'a' for column 'd' at row 1 SELECT * FROM t1; a b c d NULL NULL a NULL DROP TABLE t1; # At the time of writing, val_int() is not called when inserting into an # INT column. SELECT cast( regexp_replace('a', 'a', 'a') AS SIGNED INTEGER ); cast( regexp_replace('a', 'a', 'a') AS SIGNED INTEGER ) 0 Warnings: Warning 1292 Truncated incorrect INTEGER value: 'a' SELECT cast( regexp_substr ('a', 'a') AS SIGNED INTEGER ); cast( regexp_substr ('a', 'a') AS SIGNED INTEGER ) 0 Warnings: Warning 1292 Truncated incorrect INTEGER value: 'a' # Casting to DATETIME/TIME # Due to the class hierarchy of function objects, these have to be # copy-pasted. SELECT cast( regexp_instr ('a', 'a' ) AS DATETIME ); cast( regexp_instr ('a', 'a' ) AS DATETIME ) NULL Warnings: Warning 1292 Incorrect datetime value: '1' SELECT cast( regexp_like ('a', 'a' ) AS DATETIME ); cast( regexp_like ('a', 'a' ) AS DATETIME ) NULL Warnings: Warning 1292 Incorrect datetime value: '1' SELECT cast( regexp_replace('a', 'a', 'a') AS DATETIME ); cast( regexp_replace('a', 'a', 'a') AS DATETIME ) NULL Warnings: Warning 1292 Incorrect datetime value: 'a' SELECT cast( regexp_substr ('a', 'a' ) AS DATETIME ); cast( regexp_substr ('a', 'a' ) AS DATETIME ) NULL Warnings: Warning 1292 Incorrect datetime value: 'a' SELECT cast( regexp_instr ('a', 'a' ) AS TIME ); cast( regexp_instr ('a', 'a' ) AS TIME ) 00:00:01 SELECT cast( regexp_like ('a', 'a' ) AS TIME ); cast( regexp_like ('a', 'a' ) AS TIME ) 00:00:01 SELECT cast( regexp_replace('a', 'a', 'a') AS TIME ); cast( regexp_replace('a', 'a', 'a') AS TIME ) NULL Warnings: Warning 1292 Truncated incorrect time value: 'a' SELECT cast( regexp_substr ('a', 'a' ) AS TIME ); cast( regexp_substr ('a', 'a' ) AS TIME ) NULL Warnings: Warning 1292 Truncated incorrect time value: 'a' # # Error messages. # Test of the error messages themselves. # SET GLOBAL net_buffer_length = 1024; SET GLOBAL max_allowed_packet = @@global.net_buffer_length; SELECT @@global.max_allowed_packet; @@global.max_allowed_packet 1024 # We need to start a new session in order for the changes to the session # version of max_allowed_packet to take effect. # This is now the replacement buffer size in UTF-16 characters. SET @buf_sz_utf16 = @@global.max_allowed_packet / length( _utf16'x' ); SELECT @buf_sz_utf16; @buf_sz_utf16 512.000000000 SELECT length(regexp_replace( repeat('a', @buf_sz_utf16), 'a', 'b' )); length(regexp_replace( repeat('a', @buf_sz_utf16), 'a', 'b' )) 512 SELECT length(regexp_replace( repeat('a', @buf_sz_utf16 + 1), 'a', 'b' )); ERROR HY000: The result string is larger than the result buffer. SELECT length(regexp_replace( repeat('a', @buf_sz_utf16), 'a', 'bb' )); ERROR HY000: The result string is larger than the result buffer. SET GLOBAL net_buffer_length = DEFAULT; Warnings: Warning 1708 The value of 'max_allowed_packet' should be no less than the value of 'net_buffer_length' SET GLOBAL max_allowed_packet = DEFAULT; SELECT regexp_like( 'a', '[[:<:]]a' ); ERROR HY000: Illegal argument to a regular expression. SELECT regexp_like( 'a', ' **' ); ERROR HY000: Syntax error in regular expression on line 1, character 5. SELECT regexp_like( 'a', ' \n **' ); ERROR HY000: Syntax error in regular expression on line 2, character 4. SELECT regexp_like( 'a', ' +++' ); ERROR HY000: Syntax error in regular expression on line 1, character 5. SELECT regexp_like( 'a', '\\' ); ERROR HY000: Unrecognized escape sequence in regular expression. SELECT regexp_like('a','(?{})'); ERROR HY000: The regular expression contains a feature that is not implemented in this library version. SELECT regexp_like('a','('); ERROR HY000: Mismatched parenthesis in regular expression. SELECT regexp_like('a','a{}'); ERROR HY000: Incorrect description of a {min,max} interval. SELECT regexp_like('a','a{2,1}'); ERROR HY000: The maximum is less than the minumum in a {min,max} interval. SELECT regexp_like('a','\\1'); ERROR HY000: Invalid back-reference in regular expression. SELECT regexp_substr( 'ab','(?<=a+)b' ); ERROR HY000: The look-behind assertion exceeds the limit in regular expression. SELECT regexp_like( 'a', 'a[' ); ERROR HY000: The regular expression contains an unclosed bracket expression. SELECT regexp_substr( 'ab','[b-a]' ); ERROR HY000: The regular expression contains an [x-y] character range where x comes after y. # # Test that the replacement buffer can grow beyond the maximum VARCHAR # column length. # CREATE TABLE t1 ( a TEXT ); INSERT INTO t1 VALUES ( repeat( 'a', 16384 ) ); SELECT char_length ( regexp_replace( a, 'a', 'b' ) ) FROM t1; char_length ( regexp_replace( a, 'a', 'b' ) ) 16384 SET GLOBAL regexp_time_limit = 10000; SELECT regexp_like ( regexp_replace( a, 'a', 'b' ), 'b{16384}' ) FROM t1; regexp_like ( regexp_replace( a, 'a', 'b' ), 'b{16384}' ) 1 SET GLOBAL regexp_time_limit = DEFAULT; DROP TABLE t1; # # Bug#27134570: DOS: REGEXP TAKES EXPONENTIALLY LONGER, CAN'T BE KILLED, # HOGS CPU # DO '1' regexp repeat('$', 50000000); ERROR HY000: The regular expression pattern exceeds limits on size or complexity. # # Bug#27140286: REGEXP, ASSERTION FAILED: !THD->IS_ERROR() IN # SETUP_FIELDS() # DO ( (@b) regexp (cot (unhex ( 1 )) ) ); ERROR 22003: DOUBLE value is out of range in 'cot(unhex(1))' DO ( (@c) rlike (cot ( (!( @f )) )) ); Warnings: Warning 1287 '!' is deprecated and will be removed in a future release. Please use NOT instead DO ( ('') rlike (cot ( ' %' )) ); ERROR 22003: DOUBLE value is out of range in 'cot(' %')' DO ( (-28277) regexp (period_add ( -10966, 1381205734 )) ); ERROR HY000: Incorrect arguments to period_add DO ( (( @f )) rlike (json_depth ( 'key4' )) ); ERROR 22032: Invalid JSON text in argument 1 to function json_depth: "Invalid value." at position 0. DO ( ('- ') regexp (cot ( right (':#.', 33) )) ); ERROR 22003: DOUBLE value is out of range in 'cot(right(':#.',33))' DO ( (1) regexp (exp ( 64826 )) ); ERROR 22003: DOUBLE value is out of range in 'exp(64826)' DO ( (@g) regexp (cot ( @f )) ); DO ( (@b) regexp (exp ( 0x1fc5574c )) ); ERROR 22003: DOUBLE value is out of range in 'exp(0x1fc5574c)' DO ( (25091) rlike (exp ( 14373 )) ); ERROR 22003: DOUBLE value is out of range in 'exp(14373)' # # Bug#27183583: REGEXP OPERATIONS CANT BE KILLED # SET GLOBAL regexp_time_limit = 1000000; connect conn1, localhost, root; SELECT regexp_instr('AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC', '(A+)+B'); connection default; KILL QUERY ; connection conn1; ERROR 70100: Query execution was interrupted disconnect conn1; connection default; SET GLOBAL regexp_time_limit = DEFAULT; # # Bug#27252630: REGEXP FUNCTIONS IGNORE NULLS # SELECT regexp_instr ( 'a', 'a', NULL ); regexp_instr ( 'a', 'a', NULL ) NULL SELECT regexp_instr ( 'a', 'a', 1, NULL ); regexp_instr ( 'a', 'a', 1, NULL ) NULL SELECT regexp_instr ( 'a', 'a', 1, 0, NULL ); regexp_instr ( 'a', 'a', 1, 0, NULL ) NULL SELECT regexp_instr ( 'a', 'a', 1, 0, 0, NULL ); regexp_instr ( 'a', 'a', 1, 0, 0, NULL ) NULL SELECT regexp_like ( 'a', 'a', NULL ); regexp_like ( 'a', 'a', NULL ) NULL SELECT regexp_replace ( 'a', 'a', 'a', NULL ); regexp_replace ( 'a', 'a', 'a', NULL ) NULL SELECT regexp_replace ( 'a', 'a', 'a', 1, NULL ); regexp_replace ( 'a', 'a', 'a', 1, NULL ) NULL SELECT regexp_replace ( 'a', 'a', 'a', 1, 0, NULL ); regexp_replace ( 'a', 'a', 'a', 1, 0, NULL ) NULL SELECT regexp_substr ( 'a', 'a', NULL ); regexp_substr ( 'a', 'a', NULL ) NULL SELECT regexp_substr ( 'a', 'a', 1, NULL ); regexp_substr ( 'a', 'a', 1, NULL ) NULL SELECT regexp_substr ( 'a', 'a', 1, 0, NULL ); regexp_substr ( 'a', 'a', 1, 0, NULL ) NULL # # Bug#27232647: REGEX: ASSERTION FAILED: !STR || STR != M_PTR # SELECT regexp_like( reverse(''), 123 ); regexp_like( reverse(''), 123 ) 0 SELECT regexp_like( soundex(@v1), 'abc' ); regexp_like( soundex(@v1), 'abc' ) NULL SELECT regexp_like( left('', ''), 'abc' ); regexp_like( left('', ''), 'abc' ) 0 Warnings: Warning 1292 Truncated incorrect INTEGER value: '' Warning 1292 Truncated incorrect INTEGER value: '' SELECT regexp_like( repeat(@v1, 'abc'), 'abc' ); regexp_like( repeat(@v1, 'abc'), 'abc' ) NULL Warnings: Warning 1292 Truncated incorrect INTEGER value: 'abc' Warning 1292 Truncated incorrect INTEGER value: 'abc' # # Bug#27597980: ADD ERROR MESSAGE FOR INVALID CAPTURE GROUP NAME # SELECT regexp_replace( 'abc' , 'abc', '$abc' ); ERROR HY000: A capture group has an invalid name. # # Bug#27595368: ASSERTION FAILED: 0 IN ITEM_PARAM::VAL_REAL WITH # PREPARED STATEMENT # SET @s := "SELECT regexp_like( '', '', ? / '' )"; PREPARE stmt FROM @s; EXECUTE stmt; ERROR HY000: Incorrect arguments to EXECUTE EXECUTE stmt; ERROR HY000: Incorrect arguments to EXECUTE CREATE TABLE t1 ( match_parameter CHAR(1) ); INSERT INTO t1 VALUES ( 'i' ), ( 'c' ), ( 'i' ), ( 'c' ); SELECT match_parameter, regexp_like ( 'a', 'A', match_parameter ) FROM t1; match_parameter regexp_like ( 'a', 'A', match_parameter ) i 1 c 0 i 1 c 0 DROP TABLE t1; # # Bug#27612255: VALGRIND WARNING ON INVALID CAPTURE GROUP # SELECT regexp_replace( ' F' , '^ ', '[,$' ); ERROR HY000: A capture group has an invalid name. # # Bug#27751277: ASSERTION FALSE IN REGEXP::CHECK_ICU_STATUS # SELECT regexp_instr( 'abc', '(?-' ); ERROR HY000: Invalid match mode flag in regular expression. # # Bug#27743722 REGEXP::EVALEXPRTOCHARSET: # REFERENCE BINDING TO MISALIGNED ADDRESS # select regexp_instr(char('313:50:35.199734'using utf16le),uuid()); regexp_instr(char('313:50:35.199734'using utf16le),uuid()) 0 Warnings: Warning 1292 Truncated incorrect INTEGER value: '313:50:35.199734' # # Bug#27992118: REGEXP_REPLACE ACCUMULATING RESULT # CREATE TABLE t1 ( a VARCHAR(10) ); INSERT INTO t1 VALUES ('a a a'), ('b b b'), ('c c c'); SELECT regexp_replace(a, '^([[:alpha:]]+)[[:space:]].*$', '$1') FROM t1; regexp_replace(a, '^([[:alpha:]]+)[[:space:]].*$', '$1') a b c DROP TABLE t1; # # Bug#28027093: REGEXP_REPLACE TRUNCATE UPDATE # CREATE TABLE t1 ( a CHAR(3) ); INSERT INTO t1 VALUES ( regexp_replace ('a', 'a', 'x') ); SELECT * FROM t1; a x UPDATE t1 SET a = regexp_replace ( 'b', 'b', 'y' ); SELECT * FROM t1; a y DROP TABLE t1; CREATE TABLE t1 ( a CHAR(3) ); INSERT INTO t1 VALUES ( regexp_substr ('a', 'a', 1) ); SELECT * FROM t1; a a UPDATE t1 SET a = regexp_substr ('b', 'b', 1); SELECT * FROM t1; a b DROP TABLE t1; # # Bug#27682225: WRONG METADATA FOR REGEXP FUNCTIONS # CREATE TABLE t1 AS SELECT regexp_instr( 'a', 'a' ) AS a, regexp_like( 'a', 'a' ) AS b, regexp_replace( 'abc', 'b', 'x' ) AS c, regexp_substr( 'a', 'a' ) AS d, regexp_substr( repeat('a', 512), 'a' ) AS e, regexp_substr( repeat('a', 513), 'a' ) AS f; SHOW CREATE TABLE t1; Table Create Table t1 CREATE TABLE `t1` ( `a` bigint(21) NOT NULL DEFAULT '0', `b` int(1) NOT NULL DEFAULT '0', `c` longtext CHARACTER SET latin1, `d` varchar(1) CHARACTER SET latin1 NOT NULL DEFAULT '', `e` varchar(512) CHARACTER SET latin1 DEFAULT NULL, `f` text CHARACTER SET latin1 ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci SELECT * FROM t1; a b c d e f 1 1 axc a a a DROP TABLE t1; # # Bug#29231490: REGEXP FUNCTIONS FAIL TO USE CODEPOINT POSITIONS # # Sushi emoji, may not render in your editor. SET NAMES DEFAULT; SELECT regexp_instr( '🍣🍣a', '🍣', 2 ); regexp_instr( '??a', '?', 2 ) 2 SELECT regexp_instr( '🍣🍣a', 'a', 3 ); regexp_instr( '??a', 'a', 3 ) 3 SELECT regexp_instr( '🍣🍣a', 'a', 4 ); ERROR HY000: Index out of bounds in regular expression search. SELECT regexp_substr( 'a🍣b', '.', 1 ); regexp_substr( 'a?b', '.', 1 ) a SELECT regexp_substr( 'a🍣b', '.', 2 ); regexp_substr( 'a?b', '.', 2 ) 🍣 SELECT regexp_substr( 'a🍣b', '.', 3 ); regexp_substr( 'a?b', '.', 3 ) b SELECT regexp_substr( 'a🍣b', '.', 4 ); regexp_substr( 'a?b', '.', 4 ) NULL SELECT regexp_substr( 'a🍣🍣b', '.', 1 ); regexp_substr( 'a??b', '.', 1 ) a SELECT regexp_substr( 'a🍣🍣b', '.', 2 ); regexp_substr( 'a??b', '.', 2 ) 🍣 SELECT regexp_substr( 'a🍣🍣b', '.', 3 ); regexp_substr( 'a??b', '.', 3 ) 🍣 SELECT regexp_substr( 'a🍣🍣b', '.', 4 ); regexp_substr( 'a??b', '.', 4 ) b SELECT regexp_substr( 'a🍣🍣b', '.', 5 ); regexp_substr( 'a??b', '.', 5 ) NULL SELECT regexp_replace( '🍣🍣🍣', '.', 'a', 2 ); regexp_replace( '???', '.', 'a', 2 ) 🍣aa SELECT regexp_replace( '🍣🍣🍣', '.', 'a', 2, 2 ); regexp_replace( '???', '.', 'a', 2, 2 ) 🍣🍣a # # REGEXP_REPLACE DOES NOT CONVERT RESULT CHARACTER SET # SELECT hex(regexp_replace( convert( 'abcd' using utf8mb4 ), 'c', '')); hex(regexp_replace( convert( 'abcd' using utf8mb4 ), 'c', '')) 616264 SELECT hex(regexp_replace( convert( 'abcd' using utf16 ), 'c', '')); hex(regexp_replace( convert( 'abcd' using utf16 ), 'c', '')) 006100620064 SELECT hex(regexp_substr( convert( 'abcd' using utf8mb4 ), 'abc')); hex(regexp_substr( convert( 'abcd' using utf8mb4 ), 'abc')) 616263 SELECT hex(regexp_substr( convert( 'abcd' using utf16 ), 'abc')); hex(regexp_substr( convert( 'abcd' using utf16 ), 'abc')) 006100620063 # # Test of the code path that elides character set conversion when the # target column has the same character set as ICU produces. This depends # on the architecture, and so we try both big and little endian. # CREATE TABLE t1 ( a CHAR(10) CHARACTER SET utf16le, b CHAR(10) CHARACTER SET utf16 ); INSERT INTO t1 VALUES ( regexp_substr( convert('abcd' using utf16le), 'abc' ), regexp_substr( convert('abcd' using utf16), 'abc' )); INSERT INTO t1 VALUES ( regexp_substr( 'abcd', 'abc' ), regexp_substr( 'abcd', 'abc' )); SELECT * FROM t1; a b abc abc abc abc DROP TABLE t1;