--echo # --echo # Tests of regular expression functions. --echo # --source ../mysql-test/include/icu_utils.inc CREATE TABLE t1 ( subject char(10), pattern char(10) ); --echo # --echo # REGEXP_INSTR --echo # --echo # REGEXP_INSTR(), two arguments. SELECT regexp_instr( 'abc', 'a' ); SELECT regexp_instr( 'abc', 'b' ); SELECT regexp_instr( 'abc', 'c' ); SELECT regexp_instr( 'abc', 'd' ); SELECT regexp_instr( NULL, 'a' ); SELECT regexp_instr( 'a', NULL ); SELECT regexp_instr( NULL, NULL ); --echo # This tests that the correct character set is declared. If we don't --echo # declare it correctly, the UTF-16 sequence will be interpreted as a --echo # two-byte string consisting of the ASCII for '1' and NUL, and the --echo # result will be 3100. SET NAMES binary; SELECT hex( concat(regexp_instr( 'a', 'a' )) ); SET NAMES DEFAULT; --echo # REGEXP_INSTR(), illegal types. SELECT regexp_instr( 1, 'a' ); SELECT regexp_instr( 1.1, 'a' ); SELECT regexp_instr( 'a', 1 ); SELECT regexp_instr( 'a', 1.1 ); SELECT regexp_instr( subject, pattern ) FROM t1; --error ER_REGEXP_ILLEGAL_ARGUMENT SELECT regexp_instr( 'a', '[[:invalid_bracket_expression:]]' ); --echo # REGEXP_INSTR(), three arguments. --echo # subject, pattern, position. SELECT regexp_instr( 'abcabcabc', 'a+', 1 ); SELECT regexp_instr( 'abcabcabc', 'a+', 2 ); SELECT regexp_instr( 'abcabcabc', 'b+', 1 ); SELECT regexp_instr( 'abcabcabc', 'b+', 2 ); SELECT regexp_instr( 'abcabcabc', 'b+', 3 ); --error ER_REGEXP_INDEX_OUTOFBOUNDS_ERROR SELECT regexp_instr( 'a', 'a+', 3 ); --echo # REGEXP_INSTR(), four arguments. --echo # subject, pattern, position, occurence. SELECT regexp_instr( 'abcabcabc', 'a+', 1, 2 ); SELECT regexp_instr( 'abcabcabc', 'a+', 1, 3 ); SELECT regexp_instr( 'abcabcabc', 'a+', 1, 4 ); SELECT regexp_instr( 'abcabcabc', 'a+', 4, 2 ); --echo # REGEXP_INSTR(), five arguments. --echo # subject, pattern, position, occurence, return_option --error ER_WRONG_ARGUMENTS SELECT regexp_instr( 'a', 'a+', 1, 1, 2 ); --error ER_WRONG_ARGUMENTS SELECT regexp_instr( 'a', 'a+', 1, 1, -1 ); SELECT regexp_instr( 'a', 'a+', 1, 1, NULL ); SELECT regexp_instr( 'abcabcabc', 'a+', 1, 1, 0 ); SELECT regexp_instr( 'abcabcabc', 'a+', 1, 1, 1 ); SELECT regexp_instr( 'aaabcabcabc', 'a+', 1, 1, 1 ); --echo # REGEXP_INSTR(), six arguments. --echo # subject, pattern, position, occurence, return_option, match_parameter SELECT regexp_instr( 'aaabcabcabc', 'A+', 1, 1, 1, 'c' ); SELECT regexp_instr( 'aaabcabcabc', 'A+', 1, 1, 1, 'i' ); SELECT regexp_instr( 'aaabcabcabc', 'A+', 1, 1, 1, 'ci' ); SELECT regexp_instr( 'aaabcabcabc', 'A+', 1, 1, 1, 'cic' ); --error ER_WRONG_ARGUMENTS SELECT regexp_instr( 'a', 'a+', 1, 1, 1, 'x' ); SELECT regexp_instr( 'a', 'a+', 1, 1, 1, NULL ); --echo # --echo # REGEXP_LIKE --echo # --echo # REGEXP_LIKE(), two arguments. SELECT regexp_like( 'abc', 'a' ); SELECT regexp_like( 'abc', 'b' ); SELECT regexp_like( 'abc', 'c' ); SELECT regexp_like( 'abc', 'd' ); SELECT regexp_like( 'a', 'a.*' ); SELECT regexp_like( 'ab', 'a.*' ); SELECT regexp_like( NULL, 'a' ); SELECT regexp_like( 'a', NULL ); SELECT regexp_like( NULL, NULL ); --echo # This tests that the correct character set is declared. If we don't --echo # declare it correctly, the UTF-16 sequence will be interpreted as a --echo # two-byte string consisting of the ASCII for '1' and NUL, and the --echo # result will be 3100. SET NAMES binary; SELECT hex( concat(regexp_like( 'a', 'a' )) ); SET NAMES DEFAULT; --echo # REGEXP_LIKE(), three arguments. SELECT regexp_like( 'abc', 'A', 'i' ); SELECT regexp_like( 'abc', 'A', 'c' ); --error ER_WRONG_ARGUMENTS SELECT regexp_like( 'a', 'a+', 'x' ); --error ER_WRONG_ARGUMENTS SELECT regexp_like( 'a', 'a+', 'cmnux' ); SELECT regexp_like( 'a', 'a+', NULL ); --echo # REGEXP_LIKE(), illegal types. SELECT regexp_like( 1, 'a' ); SELECT regexp_like( 1.1, 'a' ); SELECT regexp_like( 'a', 1 ); SELECT regexp_like( 'a', 1.1 ); --error ER_REGEXP_ILLEGAL_ARGUMENT SELECT regexp_like('a', '[[:invalid_bracket_expression:]]'); --echo # --echo # REGEXP_REPLACE --echo # --echo # REGEXP_REPLACE(), three arguments. SELECT regexp_replace( 'aaa', 'a', 'X' ); SELECT regexp_replace( 'abc', 'b', 'X' ); SELECT regexp_replace( NULL, 'a', 'X' ); SELECT regexp_replace( 'aaa', NULL, 'X' ); SELECT regexp_replace( 'aaa', 'a', NULL ); --echo # This tests that the correct character set is declared. If we don't --echo # declare it correctly, the UTF-16 sequence will be interpreted as a --echo # zero-terminated string consisting of 'X', and the --echo # result will thus be 'X'. SELECT concat( regexp_replace( 'aaa', 'a', 'X' ), 'x' ); --echo # REGEXP_REPLACE(), four arguments. --error ER_WRONG_PARAMETERS_TO_NATIVE_FCT SELECT regexp_replace( 'aaa', 'a', 'X', 0 ); SELECT regexp_replace( 'aaa', 'a', 'X', 1 ); --error ER_REGEXP_INDEX_OUTOFBOUNDS_ERROR SELECT regexp_replace( 'a', 'a+', 'b', 3 ); --echo # REGEXP_REPLACE(), five arguments. --error ER_WRONG_PARAMETERS_TO_NATIVE_FCT SELECT regexp_replace( 'aaabbccbbddaa', 'b+', 'X', 0, 1 ); SELECT regexp_replace( 'aaabbccbbddaa', 'b+', 'X', 1, 1 ); SELECT regexp_replace( 'aaabbccbbddaa', 'b+', 'X', 1, 2 ); SELECT regexp_replace( 'aaabbccbbddaa', '(b+)', '<$1>', 1, 2 ); SELECT regexp_replace( 'aaabbccbbddaa', 'x+', 'x', 1, 0 ); SELECT regexp_replace( 'aaabbccbbddaa', 'b+', 'x', 1, 0 ); SELECT regexp_replace( 'aaab', 'b', 'x', 1, 2 ); SELECT regexp_replace( 'aaabccc', 'b', 'x', 1, 2 ); SELECT regexp_replace( 'abc', 'b', 'X' ); SELECT regexp_replace( 'abcbdb', 'b', 'X' ); SELECT regexp_replace( 'abcbdb', 'b', 'X', 3 ); SELECT regexp_replace( 'aaabcbdb', 'b', 'X', 1 ); SELECT regexp_replace( 'aaabcbdb', 'b', 'X', 2 ); SELECT regexp_replace( 'aaabcbdb', 'b', 'X', 3 ); --error ER_REGEXP_ILLEGAL_ARGUMENT SELECT regexp_replace( 'a', '[[:invalid_bracket_expression:]]', '$1' ); --echo # --echo # Test of the dynamic buffer in REGEXP_REPLACE. --echo # SELECT regexp_replace( 'aaa', 'a', 'X', 2 ); SELECT regexp_replace( 'aaa', 'a', 'XX', 2 ); --echo # --echo # REGEXP_SUBSTR --echo # --error ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT SELECT regexp_substr( 'a' ); --error ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT SELECT regexp_substr( 'a', 'b', 'c', 'd', 'e', 'f' ); --echo # REGEXP_SUBSTR(), two arguments. SELECT regexp_substr( 'ab ac ad', '.d' ); SELECT regexp_substr( 'ab ac ad', '.D' ); --echo # This tests that the correct character set is declared. If we don't --echo # declare it correctly, the UTF-16 sequence will be interpreted as a --echo # zero-terminated string consisting of 'a', and the --echo # result will thus be 'a'. SELECT concat( regexp_substr( 'aaa', 'a+' ), 'x' ); --echo # REGEXP_SUBSTR(), three arguments. --error ER_WRONG_PARAMETERS_TO_NATIVE_FCT SELECT regexp_substr( 'ab ac ad', 'a.', 0 ); SELECT regexp_substr( 'ab ac ad', 'A.', 3 ); SELECT regexp_substr( 'ab ac ad', 'A.', 3, 1 ); SELECT regexp_substr( 'ab ac ad', 'A.', 3, 2 ); SELECT regexp_substr( 'ab ac ad', 'A.', 3, 3 ); SELECT regexp_substr( 'ab ac ad', 'A.', 3, 3 ) IS NULL; --echo # REGEXP_SUBSTR(), four arguments. SELECT regexp_substr( 'ab ac ad', 'A.', 1, 1, 'c' ); SELECT regexp_substr( 'ab\nac\nad', 'A.', 1, 1, 'i' ); SELECT regexp_substr( 'ab\nac\nad', 'A.', 1, 1, 'im' ); --error ER_REGEXP_ILLEGAL_ARGUMENT SELECT regexp_substr('a', '[[:invalid_bracket_expression:]]'); SET sql_mode = ''; # Un-strict CREATE TABLE t2 ( g GEOMETRY NOT NULL ); INSERT INTO t2 VALUES ( POINT(1,2) ); SELECT concat( regexp_like(g, g), 'x' ) FROM t2; SET sql_mode = DEFAULT; DROP TABLE t2; --echo # DROP TABLE t1; --echo # --echo # Error handling. --echo # --echo # Error handling: Stack limit. --echo # The following queries are from bug#24449090 --error ER_REGEXP_BAD_INTERVAL SELECT regexp_instr( '', '((((((((){80}){}){11}){11}){11}){80}){11}){4}' ); --echo # Query and result logs turned off from here ... --disable_query_log --disable_result_log if (`SELECT icu_major_version() > 50`) { SET GLOBAL regexp_stack_limit = 239; --error ER_REGEXP_STACK_OVERFLOW SELECT regexp_instr( '', '(((((((){120}){11}){11}){11}){80}){11}){4}' ); SET GLOBAL regexp_stack_limit = 32; --error ER_REGEXP_STACK_OVERFLOW SELECT regexp_instr( '', '((((((){11}){11}){11}){160}){11}){160}' ); --error ER_REGEXP_STACK_OVERFLOW SELECT regexp_instr( '', '(((((){150}){11}){160}){11}){160}' ); --error ER_REGEXP_STACK_OVERFLOW SELECT regexp_instr( '', '((((){255}){255}){255}){255}' ); SET GLOBAL regexp_stack_limit = DEFAULT; SET GLOBAL regexp_time_limit = 1000; --error ER_REGEXP_TIME_OUT SELECT regexp_instr( 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC', '(A+)+B' ); SET GLOBAL regexp_time_limit = DEFAULT; } --enable_result_log --enable_query_log --echo # ... to here. The reason is that we're testing the stack limit feature, --echo # which is not present (or working) in ICU 5.0 or earlier. We don't need --echo # the query or result/error log to test the feature, though. The above --echo # tests still fail unless the proper error is thrown. --echo # --echo # Character set conversion. --echo # SET NAMES latin1; SELECT regexp_instr( _latin1 x'61F662', _latin1 x'F6' ); SELECT regexp_instr( _latin1 x'61F662', _utf8mb4'ö' ); # SUSHI, codepoint U+1F363, in SMP SELECT regexp_instr( concat('a', _utf8mb4 x'F09F8DA3'), _utf8mb4 x'F09F8DA3' ); SELECT regexp_instr( _utf8mb4'aöb', _utf8mb4'ö' ); SET NAMES utf8; SELECT regexp_instr( 'aöb', 'ö' ); SET NAMES DEFAULT; #--echo # #--echo # Case sensitivity. #--echo # #SELECT 'a' REGEXP 'A' COLLATE latin1_general_ci; #CREATE TABLE t1 ( a char(6), b char(6) ); #INSERT INTO t1 VALUES ('',''), ('','First'), ('Random','Random'); #SELECT * FROM t1 WHERE CONCAT ( a, ' ', b ) REGEXP 'First.*'; #DROP TABLE t1; #set collation_connection=utf8mb4_general_ci; #--source include/ctype_regex.inc --echo # --echo # Bug#30241: Regular expression problems --echo # SET NAMES utf8; SELECT regexp_instr( 'אב רק', /*k*/'^[^ב]' ); PREPARE stmt1 FROM "select 'a' rlike ?"; DEALLOCATE PREPARE stmt1; CREATE TABLE t1( a INT, subject CHAR(10) ); CREATE TABLE t2( pattern CHAR(10) ); insert into t1 values (0, 'apa'); insert into t2 values ('apa'); DELIMITER ||; CREATE DEFINER=root@localhost PROCEDURE p1() BEGIN UPDATE t1, t2 SET a = 1 WHERE regexp_like(t1.subject, t2.pattern); END|| DELIMITER ;|| CALL p1(); DROP PROCEDURE p1; DROP TABLE t1, t2; CREATE TABLE t1 ( a INT ); EXPLAIN SELECT 1 FROM t1 WHERE 1 REGEXP (1 IN (SELECT 1 FROM t1)); DROP TABLE t1; PREPARE stmt1 FROM "SELECT regexp_like( 'a', ? )"; PREPARE stmt2 FROM "SELECT regexp_like( ?, 'a' )"; PREPARE stmt3 FROM "SELECT regexp_like( ?, ? )"; SET @subject = 'a'; SET @pattern = 'a+'; EXECUTE stmt1 USING @pattern; EXECUTE stmt2 USING @subject; EXECUTE stmt3 USING @subject, @pattern; --echo # --echo # Yes, you can circumvent the type checking using prepared statements. --echo # SET @subject = 1; SET @pattern = 1; EXECUTE stmt1 USING @pattern; EXECUTE stmt2 USING @subject; EXECUTE stmt3 USING @subject, @pattern; DEALLOCATE PREPARE stmt1; DEALLOCATE PREPARE stmt2; DEALLOCATE PREPARE stmt3; --echo # Let's make sure we handle arguments that raise errors when evaluated. --error ER_WRONG_ARGUMENTS DO 1 rlike multilinestring(point(1, 1)); --echo # --echo # Test of repeated use of one matcher. --echo # CREATE TABLE t1 ( a CHAR(10) ); INSERT INTO t1 VALUES ( 'abc' ), ( 'bcd' ), ( 'cde' ); SELECT regexp_like( a, 'a' ) FROM t1; DROP TABLE t1; CREATE TABLE t1 ( a CHAR ( 10 ), b CHAR ( 10 ) ); INSERT INTO t1 VALUES( NULL, 'abc' ); INSERT INTO t1 VALUES( 'def', NULL ); SELECT a, b, regexp_like( a, b ) FROM t1; DROP TABLE t1; --echo # --echo # Generated columns. --echo # CREATE TABLE t1 ( c CHAR(10) CHARSET latin1 COLLATE latin1_bin, c_ci CHAR(10) CHARSET latin1 COLLATE latin1_general_ci, c_cs CHAR(10) CHARSET latin1 COLLATE latin1_general_cs ); INSERT INTO t1 VALUES ( 'a', 'a', 'a' ), ( 'A', 'A', 'A' ), ( 'b', 'b', 'b' ); --sorted_result SELECT c, c_ci REGEXP 'A', c_cs REGEXP 'A' FROM t1; DROP TABLE t1; SELECT regexp_like( _utf8mb4 'ss' COLLATE utf8mb4_german2_ci, _utf8mb4 'ß' COLLATE utf8mb4_german2_ci ); SELECT regexp_like( _utf8mb4 'ß' COLLATE utf8mb4_german2_ci, _utf8mb4 'ss' ); SELECT regexp_like( _utf8mb4 'ß' COLLATE utf8mb4_de_pb_0900_as_cs, _utf8mb4 'ss' ); --echo # --echo # Regression testing. --echo # SET NAMES latin1; SELECT regexp_like( 'a', 'A' COLLATE latin1_general_ci ); SELECT 'a' REGEXP 'A' COLLATE latin1_general_ci; SELECT regexp_like( 'a', 'A' COLLATE latin1_general_cs ); SELECT 'a' REGEXP 'A' COLLATE latin1_general_cs; --echo # The default behaviour, multiline match is off: SELECT regexp_like( 'a\nb\nc', '^b$' ); --echo # Enabling the multiline option using the PCRE option syntax: --echo # (Previously not accepted) SELECT regexp_like( 'a\nb\nc', '(?m)^b$' ); --echo # Dot matches newline: SELECT regexp_like( 'a\nb\nc', '.*' ); SELECT regexp_like( _utf16 'a' , 'a' ); SELECT regexp_like( _utf16le 'a' , 'a' ); --echo # Make sure we exit on error. --error ER_WRONG_ARGUMENTS SELECT regexp_like( 'aaa', 'a+', 1 ); --error ER_WRONG_ARGUMENTS SELECT regexp_substr( 'aaa', 'a+', 1, 1, 1 ); --error ER_WRONG_ARGUMENTS SELECT regexp_substr( 'aaa', 'a+', 1, 1, 1 ); --error ER_REGEXP_RULE_SYNTAX SELECT regexp_substr( 'aaa', '+' ); --echo # --echo # Test of a table where the columns are already in the lib's charset and --echo # contain NULL. --echo # CREATE TABLE t1 ( a CHAR(3) CHARACTER SET utf16le, b CHAR(3) CHARACTER SET utf16le ); INSERT INTO t1 VALUES ( NULL, 'abc' ); INSERT INTO t1 VALUES ( 'def', NULL ); INSERT INTO t1 VALUES ( NULL, NULL ); SELECT a regexp b FROM t1; DROP TABLE t1; --echo # --echo # Typecasts. --echo # CREATE TABLE t1 ( a REAL, b INT, c CHAR(100), d DECIMAL ); INSERT INTO t1 VALUES ( regexp_instr('a', 'a'), regexp_instr('a', 'a'), regexp_instr('a', 'a'), regexp_instr('a', 'a') ); SELECT * FROM t1; DELETE FROM t1; INSERT INTO t1 VALUES ( regexp_like('a', 'a'), regexp_like('a', 'a'), regexp_like('a', 'a'), regexp_like('a', 'a') ); SELECT * FROM t1; DELETE FROM t1; --error 1265 INSERT INTO t1 ( a ) VALUES ( regexp_replace('a', 'a', 'a') ); --error ER_TRUNCATED_WRONG_VALUE_FOR_FIELD INSERT INTO t1 ( b ) VALUES ( regexp_replace('a', 'a', 'a') ); INSERT INTO t1 ( c ) VALUES ( regexp_replace('a', 'a', 'a') ); --error ER_TRUNCATED_WRONG_VALUE_FOR_FIELD INSERT INTO t1 ( d ) VALUES ( regexp_replace('a', 'a', 'a') ); SELECT * FROM t1; DELETE FROM t1; --error 1265 INSERT INTO t1 ( a ) VALUES ( regexp_substr('a', 'a') ); --error ER_TRUNCATED_WRONG_VALUE_FOR_FIELD INSERT INTO t1 ( b ) VALUES ( regexp_substr('a', 'a') ); INSERT INTO t1 ( c ) VALUES ( regexp_substr('a', 'a') ); --error ER_TRUNCATED_WRONG_VALUE_FOR_FIELD INSERT INTO t1 ( d ) VALUES ( regexp_substr('a', 'a') ); SELECT * FROM t1; DROP TABLE t1; --echo # At the time of writing, val_int() is not called when inserting into an --echo # INT column. SELECT cast( regexp_replace('a', 'a', 'a') AS SIGNED INTEGER ); SELECT cast( regexp_substr ('a', 'a') AS SIGNED INTEGER ); --echo # Casting to DATETIME/TIME --echo # Due to the class hierarchy of function objects, these have to be --echo # copy-pasted. SELECT cast( regexp_instr ('a', 'a' ) AS DATETIME ); SELECT cast( regexp_like ('a', 'a' ) AS DATETIME ); SELECT cast( regexp_replace('a', 'a', 'a') AS DATETIME ); SELECT cast( regexp_substr ('a', 'a' ) AS DATETIME ); SELECT cast( regexp_instr ('a', 'a' ) AS TIME ); SELECT cast( regexp_like ('a', 'a' ) AS TIME ); SELECT cast( regexp_replace('a', 'a', 'a') AS TIME ); SELECT cast( regexp_substr ('a', 'a' ) AS TIME ); --echo # --echo # Error messages. --echo # Test of the error messages themselves. --echo # SET GLOBAL net_buffer_length = 1024; SET GLOBAL max_allowed_packet = @@global.net_buffer_length; SELECT @@global.max_allowed_packet; --echo # We need to start a new session in order for the changes to the session --echo # version of max_allowed_packet to take effect. --connect(conn1,localhost,root,,) --echo # This is now the replacement buffer size in UTF-16 characters. SET @buf_sz_utf16 = @@global.max_allowed_packet / length( _utf16'x' ); SELECT @buf_sz_utf16; SELECT length(regexp_replace( repeat('a', @buf_sz_utf16), 'a', 'b' )); --error ER_REGEXP_BUFFER_OVERFLOW SELECT length(regexp_replace( repeat('a', @buf_sz_utf16 + 1), 'a', 'b' )); --error ER_REGEXP_BUFFER_OVERFLOW SELECT length(regexp_replace( repeat('a', @buf_sz_utf16), 'a', 'bb' )); --disconnect conn1 --connection default SET GLOBAL net_buffer_length = DEFAULT; SET GLOBAL max_allowed_packet = DEFAULT; --error ER_REGEXP_ILLEGAL_ARGUMENT SELECT regexp_like( 'a', '[[:<:]]a' ); --error ER_REGEXP_RULE_SYNTAX SELECT regexp_like( 'a', ' **' ); --error ER_REGEXP_RULE_SYNTAX SELECT regexp_like( 'a', ' \n **' ); --error ER_REGEXP_RULE_SYNTAX SELECT regexp_like( 'a', ' +++' ); --error ER_REGEXP_BAD_ESCAPE_SEQUENCE SELECT regexp_like( 'a', '\\' ); --error ER_REGEXP_UNIMPLEMENTED SELECT regexp_like('a','(?{})'); --error ER_REGEXP_MISMATCHED_PAREN SELECT regexp_like('a','('); --error ER_REGEXP_BAD_INTERVAL SELECT regexp_like('a','a{}'); --error ER_REGEXP_MAX_LT_MIN SELECT regexp_like('a','a{2,1}'); --error ER_REGEXP_INVALID_BACK_REF SELECT regexp_like('a','\\1'); --error ER_REGEXP_LOOK_BEHIND_LIMIT SELECT regexp_substr( 'ab','(?<=a+)b' ); --error ER_REGEXP_MISSING_CLOSE_BRACKET SELECT regexp_like( 'a', 'a[' ); --error ER_REGEXP_INVALID_RANGE SELECT regexp_substr( 'ab','[b-a]' ); --echo # --echo # Test that the replacement buffer can grow beyond the maximum VARCHAR --echo # column length. --echo # CREATE TABLE t1 ( a TEXT ); # The maximum VARCHAR length, divided by max multibyte character length, # plus one. INSERT INTO t1 VALUES ( repeat( 'a', 16384 ) ); SELECT char_length ( regexp_replace( a, 'a', 'b' ) ) FROM t1; SET GLOBAL regexp_time_limit = 10000; SELECT regexp_like ( regexp_replace( a, 'a', 'b' ), 'b{16384}' ) FROM t1; SET GLOBAL regexp_time_limit = DEFAULT; DROP TABLE t1; --echo # --echo # Bug#27134570: DOS: REGEXP TAKES EXPONENTIALLY LONGER, CAN'T BE KILLED, --echo # HOGS CPU --echo # --error ER_REGEXP_PATTERN_TOO_BIG DO '1' regexp repeat('$', 50000000); --echo # --echo # Bug#27140286: REGEXP, ASSERTION FAILED: !THD->IS_ERROR() IN --echo # SETUP_FIELDS() --echo # --error ER_DATA_OUT_OF_RANGE DO ( (@b) regexp (cot (unhex ( 1 )) ) ); DO ( (@c) rlike (cot ( (!( @f )) )) ); --error ER_DATA_OUT_OF_RANGE DO ( ('') rlike (cot ( ' %' )) ); --error ER_WRONG_ARGUMENTS DO ( (-28277) regexp (period_add ( -10966, 1381205734 )) ); --error ER_INVALID_JSON_TEXT_IN_PARAM DO ( (( @f )) rlike (json_depth ( 'key4' )) ); --error ER_DATA_OUT_OF_RANGE DO ( ('- ') regexp (cot ( right (':#.', 33) )) ); --error ER_DATA_OUT_OF_RANGE DO ( (1) regexp (exp ( 64826 )) ); DO ( (@g) regexp (cot ( @f )) ); --error ER_DATA_OUT_OF_RANGE DO ( (@b) regexp (exp ( 0x1fc5574c )) ); --error ER_DATA_OUT_OF_RANGE DO ( (25091) rlike (exp ( 14373 )) ); --echo # --echo # Bug#27183583: REGEXP OPERATIONS CANT BE KILLED --echo # SET GLOBAL regexp_time_limit = 1000000; enable_connect_log; connect(conn1, localhost, root); let $conn1_id= `SELECT CONNECTION_ID()`; send SELECT regexp_instr('AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC', '(A+)+B'); connection default; # Wait until the query is actually executing before we try to kill it --let $wait_condition= SELECT count(*)=1 FROM performance_schema.threads WHERE processlist_info LIKE "SELECT regexp_instr%" AND processlist_state = "executing"; --source include/wait_condition.inc replace_result $conn1_id ; eval KILL QUERY $conn1_id; connection conn1; --error ER_QUERY_INTERRUPTED reap; disconnect conn1; source include/wait_until_disconnected.inc; connection default; disable_connect_log; SET GLOBAL regexp_time_limit = DEFAULT; --echo # --echo # Bug#27252630: REGEXP FUNCTIONS IGNORE NULLS --echo # SELECT regexp_instr ( 'a', 'a', NULL ); SELECT regexp_instr ( 'a', 'a', 1, NULL ); SELECT regexp_instr ( 'a', 'a', 1, 0, NULL ); SELECT regexp_instr ( 'a', 'a', 1, 0, 0, NULL ); SELECT regexp_like ( 'a', 'a', NULL ); SELECT regexp_replace ( 'a', 'a', 'a', NULL ); SELECT regexp_replace ( 'a', 'a', 'a', 1, NULL ); SELECT regexp_replace ( 'a', 'a', 'a', 1, 0, NULL ); SELECT regexp_substr ( 'a', 'a', NULL ); SELECT regexp_substr ( 'a', 'a', 1, NULL ); SELECT regexp_substr ( 'a', 'a', 1, 0, NULL ); --echo # --echo # Bug#27232647: REGEX: ASSERTION FAILED: !STR || STR != M_PTR --echo # SELECT regexp_like( reverse(''), 123 ); SELECT regexp_like( soundex(@v1), 'abc' ); SELECT regexp_like( left('', ''), 'abc' ); SELECT regexp_like( repeat(@v1, 'abc'), 'abc' ); --echo # --echo # Bug#27597980: ADD ERROR MESSAGE FOR INVALID CAPTURE GROUP NAME --echo # --error ER_REGEXP_INVALID_CAPTURE_GROUP_NAME SELECT regexp_replace( 'abc' , 'abc', '$abc' ); --echo # --echo # Bug#27595368: ASSERTION FAILED: 0 IN ITEM_PARAM::VAL_REAL WITH --echo # PREPARED STATEMENT --echo # SET @s := "SELECT regexp_like( '', '', ? / '' )"; PREPARE stmt FROM @s; --error ER_WRONG_ARGUMENTS EXECUTE stmt; --error ER_WRONG_ARGUMENTS EXECUTE stmt; CREATE TABLE t1 ( match_parameter CHAR(1) ); INSERT INTO t1 VALUES ( 'i' ), ( 'c' ), ( 'i' ), ( 'c' ); SELECT match_parameter, regexp_like ( 'a', 'A', match_parameter ) FROM t1; DROP TABLE t1; --echo # --echo # Bug#27612255: VALGRIND WARNING ON INVALID CAPTURE GROUP --echo # --error ER_REGEXP_INVALID_CAPTURE_GROUP_NAME SELECT regexp_replace( ' F' , '^ ', '[,$' ); --echo # --echo # Bug#27751277: ASSERTION FALSE IN REGEXP::CHECK_ICU_STATUS --echo # --error ER_REGEXP_INVALID_FLAG SELECT regexp_instr( 'abc', '(?-' ); --echo # --echo # Bug#27743722 REGEXP::EVALEXPRTOCHARSET: --echo # REFERENCE BINDING TO MISALIGNED ADDRESS --echo # select regexp_instr(char('313:50:35.199734'using utf16le),uuid()); --echo # --echo # Bug#27992118: REGEXP_REPLACE ACCUMULATING RESULT --echo # CREATE TABLE t1 ( a VARCHAR(10) ); INSERT INTO t1 VALUES ('a a a'), ('b b b'), ('c c c'); SELECT regexp_replace(a, '^([[:alpha:]]+)[[:space:]].*$', '$1') FROM t1; DROP TABLE t1; --echo # --echo # Bug#28027093: REGEXP_REPLACE TRUNCATE UPDATE --echo # CREATE TABLE t1 ( a CHAR(3) ); INSERT INTO t1 VALUES ( regexp_replace ('a', 'a', 'x') ); SELECT * FROM t1; UPDATE t1 SET a = regexp_replace ( 'b', 'b', 'y' ); SELECT * FROM t1; DROP TABLE t1; CREATE TABLE t1 ( a CHAR(3) ); INSERT INTO t1 VALUES ( regexp_substr ('a', 'a', 1) ); SELECT * FROM t1; UPDATE t1 SET a = regexp_substr ('b', 'b', 1); SELECT * FROM t1; DROP TABLE t1; --echo # --echo # Bug#27682225: WRONG METADATA FOR REGEXP FUNCTIONS --echo # CREATE TABLE t1 AS SELECT regexp_instr( 'a', 'a' ) AS a, regexp_like( 'a', 'a' ) AS b, regexp_replace( 'abc', 'b', 'x' ) AS c, regexp_substr( 'a', 'a' ) AS d, regexp_substr( repeat('a', 512), 'a' ) AS e, regexp_substr( repeat('a', 513), 'a' ) AS f; SHOW CREATE TABLE t1; SELECT * FROM t1; DROP TABLE t1; --echo # --echo # Bug#29231490: REGEXP FUNCTIONS FAIL TO USE CODEPOINT POSITIONS --echo # --echo # Sushi emoji, may not render in your editor. SET NAMES DEFAULT; SELECT regexp_instr( '🍣🍣a', '🍣', 2 ); SELECT regexp_instr( '🍣🍣a', 'a', 3 ); --error ER_REGEXP_INDEX_OUTOFBOUNDS_ERROR SELECT regexp_instr( '🍣🍣a', 'a', 4 ); SELECT regexp_substr( 'a🍣b', '.', 1 ); SELECT regexp_substr( 'a🍣b', '.', 2 ); SELECT regexp_substr( 'a🍣b', '.', 3 ); SELECT regexp_substr( 'a🍣b', '.', 4 ); SELECT regexp_substr( 'a🍣🍣b', '.', 1 ); SELECT regexp_substr( 'a🍣🍣b', '.', 2 ); SELECT regexp_substr( 'a🍣🍣b', '.', 3 ); SELECT regexp_substr( 'a🍣🍣b', '.', 4 ); SELECT regexp_substr( 'a🍣🍣b', '.', 5 ); SELECT regexp_replace( '🍣🍣🍣', '.', 'a', 2 ); SELECT regexp_replace( '🍣🍣🍣', '.', 'a', 2, 2 ); --source ../mysql-test/include/cleanup_icu_utils.inc --echo # --echo # REGEXP_REPLACE DOES NOT CONVERT RESULT CHARACTER SET --echo # SELECT hex(regexp_replace( convert( 'abcd' using utf8mb4 ), 'c', '')); SELECT hex(regexp_replace( convert( 'abcd' using utf16 ), 'c', '')); SELECT hex(regexp_substr( convert( 'abcd' using utf8mb4 ), 'abc')); SELECT hex(regexp_substr( convert( 'abcd' using utf16 ), 'abc')); --echo # --echo # Test of the code path that elides character set conversion when the --echo # target column has the same character set as ICU produces. This depends --echo # on the architecture, and so we try both big and little endian. --echo # CREATE TABLE t1 ( a CHAR(10) CHARACTER SET utf16le, b CHAR(10) CHARACTER SET utf16 ); INSERT INTO t1 VALUES ( regexp_substr( convert('abcd' using utf16le), 'abc' ), regexp_substr( convert('abcd' using utf16), 'abc' )); INSERT INTO t1 VALUES ( regexp_substr( 'abcd', 'abc' ), regexp_substr( 'abcd', 'abc' )); SELECT * FROM t1; DROP TABLE t1;