4977 lines
152 KiB
C++
4977 lines
152 KiB
C++
/*****************************************************************************
|
|
|
|
Copyright (c) 2000, 2019, Oracle and/or its affiliates. All Rights Reserved.
|
|
|
|
This program is free software; you can redistribute it and/or modify it under
|
|
the terms of the GNU General Public License, version 2.0, as published by the
|
|
Free Software Foundation.
|
|
|
|
This program is also distributed with certain software (including but not
|
|
limited to OpenSSL) that is licensed under separate terms, as designated in a
|
|
particular file or component or in included license documentation. The authors
|
|
of MySQL hereby grant you an additional permission to link the program and
|
|
your derivative works with the separately licensed software that they have
|
|
included with MySQL.
|
|
|
|
This program is distributed in the hope that it will be useful, but WITHOUT
|
|
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
|
|
for more details.
|
|
|
|
You should have received a copy of the GNU General Public License along with
|
|
this program; if not, write to the Free Software Foundation, Inc.,
|
|
51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
*****************************************************************************/
|
|
|
|
/** @file row/row0mysql.cc
|
|
Interface between Innobase row operations and MySQL.
|
|
Contains also create table and other data dictionary operations.
|
|
|
|
Created 9/17/2000 Heikki Tuuri
|
|
*******************************************************/
|
|
|
|
#include <debug_sync.h>
|
|
#include <gstream.h>
|
|
#include <spatial.h>
|
|
#include <sql_const.h>
|
|
#include <sys/types.h>
|
|
#include <algorithm>
|
|
#include <deque>
|
|
#include <new>
|
|
#include <vector>
|
|
|
|
#include "btr0sea.h"
|
|
#include "dict0boot.h"
|
|
#include "dict0crea.h"
|
|
#include "dict0dd.h"
|
|
#include "dict0dict.h"
|
|
#include "dict0load.h"
|
|
#include "dict0priv.h"
|
|
#include "dict0stats.h"
|
|
#include "dict0stats_bg.h"
|
|
#include "fil0fil.h"
|
|
#include "fsp0file.h"
|
|
#include "fsp0sysspace.h"
|
|
#include "fts0fts.h"
|
|
#include "fts0types.h"
|
|
#include "ha_prototypes.h"
|
|
#include "ibuf0ibuf.h"
|
|
#include "lock0lock.h"
|
|
#include "log0log.h"
|
|
#include "pars0pars.h"
|
|
#include "que0que.h"
|
|
#include "rem0cmp.h"
|
|
#include "row0ext.h"
|
|
#include "row0import.h"
|
|
#include "row0ins.h"
|
|
#include "row0merge.h"
|
|
#include "row0mysql.h"
|
|
#include "row0pread.h"
|
|
#include "row0row.h"
|
|
#include "row0sel.h"
|
|
#include "row0upd.h"
|
|
#include "trx0purge.h"
|
|
#include "trx0rec.h"
|
|
#include "trx0roll.h"
|
|
#include "trx0undo.h"
|
|
#include "ut0mpmcbq.h"
|
|
#include "ut0new.h"
|
|
|
|
#include "current_thd.h"
|
|
#include "my_dbug.h"
|
|
#include "my_io.h"
|
|
|
|
#include "lizard0dict.h"
|
|
#include "lizard0gp.h"
|
|
|
|
static const char *MODIFICATIONS_NOT_ALLOWED_MSG_FORCE_RECOVERY =
|
|
"innodb_force_recovery is on. We do not allow database modifications"
|
|
" by the user. Shut down mysqld and edit my.cnf to set"
|
|
" innodb_force_recovery=0";
|
|
|
|
/** Provide optional 4.x backwards compatibility for 5.0 and above */
|
|
ibool row_rollback_on_timeout = FALSE;
|
|
|
|
/** Chain node of the list of tables to drop in the background. */
|
|
struct row_mysql_drop_t {
|
|
char *table_name; /*!< table name */
|
|
UT_LIST_NODE_T(row_mysql_drop_t) row_mysql_drop_list;
|
|
/*!< list chain node */
|
|
};
|
|
|
|
/** @brief List of tables we should drop in background.
|
|
|
|
ALTER TABLE in MySQL requires that the table handler can drop the
|
|
table in background when there are no queries to it any
|
|
more. Protected by row_drop_list_mutex. */
|
|
static UT_LIST_BASE_NODE_T(row_mysql_drop_t) row_mysql_drop_list;
|
|
|
|
/** Mutex protecting the background table drop list. */
|
|
static ib_mutex_t row_drop_list_mutex;
|
|
|
|
/** Flag: has row_mysql_drop_list been initialized? */
|
|
static ibool row_mysql_drop_list_inited = FALSE;
|
|
|
|
/** If a table is not yet in the drop list, adds the table to the list of tables
|
|
which the master thread drops in background. We need this on Unix because in
|
|
ALTER TABLE MySQL may call drop table even if the table has running queries on
|
|
it. Also, if there are running foreign key checks on the table, we drop the
|
|
table lazily.
|
|
@return true if the table was not yet in the drop list, and was added there */
|
|
static ibool row_add_table_to_background_drop_list(
|
|
const char *name); /*!< in: table name */
|
|
|
|
#ifdef UNIV_DEBUG
|
|
/** Wait for the background drop list to become empty. */
|
|
void row_wait_for_background_drop_list_empty() {
|
|
bool empty = false;
|
|
while (!empty) {
|
|
mutex_enter(&row_drop_list_mutex);
|
|
empty = (UT_LIST_GET_LEN(row_mysql_drop_list) == 0);
|
|
mutex_exit(&row_drop_list_mutex);
|
|
os_thread_sleep(100000);
|
|
}
|
|
}
|
|
#endif /* UNIV_DEBUG */
|
|
|
|
/** Delays an INSERT, DELETE or UPDATE operation if the purge is lagging. */
|
|
static void row_mysql_delay_if_needed(void) {
|
|
if (srv_dml_needed_delay) {
|
|
os_thread_sleep(srv_dml_needed_delay);
|
|
}
|
|
}
|
|
|
|
void row_mysql_prebuilt_free_blob_heap(row_prebuilt_t *prebuilt) {
|
|
DBUG_TRACE;
|
|
|
|
DBUG_PRINT("row_mysql_prebuilt_free_blob_heap",
|
|
("blob_heap freeing: %p", prebuilt->blob_heap));
|
|
|
|
mem_heap_free(prebuilt->blob_heap);
|
|
prebuilt->blob_heap = nullptr;
|
|
}
|
|
|
|
/** Stores a >= 5.0.3 format true VARCHAR length to dest, in the MySQL row
|
|
format.
|
|
@return pointer to the data, we skip the 1 or 2 bytes at the start
|
|
that are used to store the len */
|
|
byte *row_mysql_store_true_var_len(
|
|
byte *dest, /*!< in: where to store */
|
|
ulint len, /*!< in: length, must fit in two bytes */
|
|
ulint lenlen) /*!< in: storage length of len: either 1 or 2 bytes */
|
|
{
|
|
if (lenlen == 2) {
|
|
ut_a(len < 256 * 256);
|
|
|
|
mach_write_to_2_little_endian(dest, len);
|
|
|
|
return (dest + 2);
|
|
}
|
|
|
|
ut_a(lenlen == 1);
|
|
ut_a(len < 256);
|
|
|
|
mach_write_to_1(dest, len);
|
|
|
|
return (dest + 1);
|
|
}
|
|
|
|
/** Reads a >= 5.0.3 format true VARCHAR length, in the MySQL row format, and
|
|
returns a pointer to the data.
|
|
@return pointer to the data, we skip the 1 or 2 bytes at the start
|
|
that are used to store the len */
|
|
const byte *row_mysql_read_true_varchar(
|
|
ulint *len, /*!< out: variable-length field length */
|
|
const byte *field, /*!< in: field in the MySQL format */
|
|
ulint lenlen) /*!< in: storage length of len: either 1
|
|
or 2 bytes */
|
|
{
|
|
if (lenlen == 2) {
|
|
*len = mach_read_from_2_little_endian(field);
|
|
|
|
return (field + 2);
|
|
}
|
|
|
|
ut_a(lenlen == 1);
|
|
|
|
*len = mach_read_from_1(field);
|
|
|
|
return (field + 1);
|
|
}
|
|
|
|
/** Stores a reference to a BLOB in the MySQL format. */
|
|
void row_mysql_store_blob_ref(
|
|
byte *dest, /*!< in: where to store */
|
|
ulint col_len, /*!< in: dest buffer size: determines into
|
|
how many bytes the BLOB length is stored,
|
|
the space for the length may vary from 1
|
|
to 4 bytes */
|
|
const void *data, /*!< in: BLOB data; if the value to store
|
|
is SQL NULL this should be NULL pointer */
|
|
ulint len) /*!< in: BLOB length; if the value to store
|
|
is SQL NULL this should be 0; remember
|
|
also to set the NULL bit in the MySQL record
|
|
header! */
|
|
{
|
|
/* MySQL might assume the field is set to zero except the length and
|
|
the pointer fields */
|
|
|
|
memset(dest, '\0', col_len);
|
|
|
|
/* In dest there are 1 - 4 bytes reserved for the BLOB length,
|
|
and after that 8 bytes reserved for the pointer to the data.
|
|
In 32-bit architectures we only use the first 4 bytes of the pointer
|
|
slot. */
|
|
|
|
ut_a(col_len - 8 > 1 || len < 256);
|
|
ut_a(col_len - 8 > 2 || len < 256 * 256);
|
|
ut_a(col_len - 8 > 3 || len < 256 * 256 * 256);
|
|
|
|
mach_write_to_n_little_endian(dest, col_len - 8, len);
|
|
|
|
memcpy(dest + col_len - 8, &data, sizeof data);
|
|
}
|
|
|
|
const byte *row_mysql_read_blob_ref(ulint *len, const byte *ref,
|
|
ulint col_len) {
|
|
byte *data;
|
|
|
|
*len = mach_read_from_n_little_endian(ref, col_len - 8);
|
|
|
|
memcpy(&data, ref + col_len - 8, sizeof data);
|
|
|
|
return (data);
|
|
}
|
|
|
|
/** Converting InnoDB geometry data format to MySQL data format. */
|
|
void row_mysql_store_geometry(
|
|
byte *dest, /*!< in/out: where to store */
|
|
ulint dest_len, /*!< in: dest buffer size: determines
|
|
into how many bytes the GEOMETRY length
|
|
is stored, the space for the length
|
|
may vary from 1 to 4 bytes */
|
|
const byte *src, /*!< in: GEOMETRY data; if the value to
|
|
store is SQL NULL this should be NULL
|
|
pointer */
|
|
ulint src_len) /*!< in: GEOMETRY length; if the value
|
|
to store is SQL NULL this should be 0;
|
|
remember also to set the NULL bit in
|
|
the MySQL record header! */
|
|
{
|
|
/* MySQL might assume the field is set to zero except the length and
|
|
the pointer fields */
|
|
UNIV_MEM_ASSERT_RW(src, src_len);
|
|
UNIV_MEM_ASSERT_W(dest, dest_len);
|
|
UNIV_MEM_INVALID(dest, dest_len);
|
|
|
|
memset(dest, '\0', dest_len);
|
|
|
|
/* In dest there are 1 - 4 bytes reserved for the BLOB length,
|
|
and after that 8 bytes reserved for the pointer to the data.
|
|
In 32-bit architectures we only use the first 4 bytes of the pointer
|
|
slot. */
|
|
|
|
ut_ad(dest_len - 8 > 1 || src_len < 1 << 8);
|
|
ut_ad(dest_len - 8 > 2 || src_len < 1 << 16);
|
|
ut_ad(dest_len - 8 > 3 || src_len < 1 << 24);
|
|
|
|
mach_write_to_n_little_endian(dest, dest_len - 8, src_len);
|
|
|
|
memcpy(dest + dest_len - 8, &src, sizeof src);
|
|
|
|
DBUG_EXECUTE_IF("row_print_geometry_data", {
|
|
String res;
|
|
Geometry_buffer buffer;
|
|
String wkt;
|
|
|
|
/** Show the meaning of geometry data. */
|
|
Geometry *g =
|
|
Geometry::construct(&buffer, (const char *)src, (uint32)src_len);
|
|
|
|
if (g) {
|
|
if (g->as_wkt(&wkt) == 0) {
|
|
ib::info(ER_IB_MSG_970) << "Write geometry data to"
|
|
" MySQL WKT format: "
|
|
<< wkt.c_ptr_safe() << ".";
|
|
}
|
|
}
|
|
});
|
|
}
|
|
|
|
/** Read geometry data in the MySQL format.
|
|
@return pointer to geometry data */
|
|
static const byte *row_mysql_read_geometry(
|
|
ulint *len, /*!< out: data length */
|
|
const byte *ref, /*!< in: geometry data in the
|
|
MySQL format */
|
|
ulint col_len) /*!< in: MySQL format length */
|
|
{
|
|
byte *data;
|
|
|
|
*len = mach_read_from_n_little_endian(ref, col_len - 8);
|
|
|
|
memcpy(&data, ref + col_len - 8, sizeof data);
|
|
|
|
DBUG_EXECUTE_IF("row_print_geometry_data", {
|
|
String res;
|
|
Geometry_buffer buffer;
|
|
String wkt;
|
|
|
|
/** Show the meaning of geometry data. */
|
|
Geometry *g =
|
|
Geometry::construct(&buffer, (const char *)data, (uint32)*len);
|
|
|
|
if (g) {
|
|
if (g->as_wkt(&wkt) == 0) {
|
|
ib::info(ER_IB_MSG_971) << "Read geometry data in"
|
|
" MySQL's WKT format: "
|
|
<< wkt.c_ptr_safe() << ".";
|
|
}
|
|
}
|
|
});
|
|
|
|
return (data);
|
|
}
|
|
|
|
/** Pad a column with spaces. */
|
|
void row_mysql_pad_col(ulint mbminlen, /*!< in: minimum size of a character,
|
|
in bytes */
|
|
byte *pad, /*!< out: padded buffer */
|
|
ulint len) /*!< in: number of bytes to pad */
|
|
{
|
|
const byte *pad_end;
|
|
|
|
switch (UNIV_EXPECT(mbminlen, 1)) {
|
|
default:
|
|
ut_error;
|
|
case 1:
|
|
/* space=0x20 */
|
|
memset(pad, 0x20, len);
|
|
break;
|
|
case 2:
|
|
/* space=0x0020 */
|
|
pad_end = pad + len;
|
|
ut_a(!(len % 2));
|
|
while (pad < pad_end) {
|
|
*pad++ = 0x00;
|
|
*pad++ = 0x20;
|
|
};
|
|
break;
|
|
case 4:
|
|
/* space=0x00000020 */
|
|
pad_end = pad + len;
|
|
ut_a(!(len % 4));
|
|
while (pad < pad_end) {
|
|
*pad++ = 0x00;
|
|
*pad++ = 0x00;
|
|
*pad++ = 0x00;
|
|
*pad++ = 0x20;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
/** Stores a non-SQL-NULL field given in the MySQL format in the InnoDB format.
|
|
The counterpart of this function is row_sel_field_store_in_mysql_format() in
|
|
row0sel.cc.
|
|
@return up to which byte we used buf in the conversion */
|
|
byte *row_mysql_store_col_in_innobase_format(
|
|
dfield_t *dfield, /*!< in/out: dfield where dtype
|
|
information must be already set when
|
|
this function is called! */
|
|
byte *buf, /*!< in/out: buffer for a converted
|
|
integer value; this must be at least
|
|
col_len long then! NOTE that dfield
|
|
may also get a pointer to 'buf',
|
|
therefore do not discard this as long
|
|
as dfield is used! */
|
|
ibool row_format_col, /*!< TRUE if the mysql_data is from
|
|
a MySQL row, FALSE if from a MySQL
|
|
key value;
|
|
in MySQL, a true VARCHAR storage
|
|
format differs in a row and in a
|
|
key value: in a key value the length
|
|
is always stored in 2 bytes! */
|
|
const byte *mysql_data, /*!< in: MySQL column value, not
|
|
SQL NULL; NOTE that dfield may also
|
|
get a pointer to mysql_data,
|
|
therefore do not discard this as long
|
|
as dfield is used! */
|
|
ulint col_len, /*!< in: MySQL column length; NOTE that
|
|
this is the storage length of the
|
|
column in the MySQL format row, not
|
|
necessarily the length of the actual
|
|
payload data; if the column is a true
|
|
VARCHAR then this is irrelevant */
|
|
ulint comp) /*!< in: nonzero=compact format */
|
|
{
|
|
const byte *ptr = mysql_data;
|
|
const dtype_t *dtype;
|
|
ulint type;
|
|
ulint lenlen;
|
|
|
|
dtype = dfield_get_type(dfield);
|
|
|
|
type = dtype->mtype;
|
|
|
|
if (type == DATA_INT) {
|
|
/* Store integer data in Innobase in a big-endian format,
|
|
sign bit negated if the data is a signed integer. In MySQL,
|
|
integers are stored in a little-endian format. */
|
|
|
|
byte *p = buf + col_len;
|
|
|
|
for (;;) {
|
|
p--;
|
|
*p = *mysql_data;
|
|
if (p == buf) {
|
|
break;
|
|
}
|
|
mysql_data++;
|
|
}
|
|
|
|
if (!(dtype->prtype & DATA_UNSIGNED)) {
|
|
*buf ^= 128;
|
|
}
|
|
|
|
ptr = buf;
|
|
buf += col_len;
|
|
} else if ((type == DATA_VARCHAR || type == DATA_VARMYSQL ||
|
|
type == DATA_BINARY)) {
|
|
if (dtype_get_mysql_type(dtype) == DATA_MYSQL_TRUE_VARCHAR) {
|
|
/* The length of the actual data is stored to 1 or 2
|
|
bytes at the start of the field */
|
|
|
|
if (row_format_col) {
|
|
if (dtype->prtype & DATA_LONG_TRUE_VARCHAR) {
|
|
lenlen = 2;
|
|
} else {
|
|
lenlen = 1;
|
|
}
|
|
} else {
|
|
/* In a MySQL key value, lenlen is always 2 */
|
|
lenlen = 2;
|
|
}
|
|
|
|
ptr = row_mysql_read_true_varchar(&col_len, mysql_data, lenlen);
|
|
} else {
|
|
/* Remove trailing spaces from old style VARCHAR
|
|
columns. */
|
|
|
|
/* Handle Unicode strings differently. */
|
|
ulint mbminlen = dtype_get_mbminlen(dtype);
|
|
|
|
ptr = mysql_data;
|
|
|
|
switch (mbminlen) {
|
|
default:
|
|
ut_error;
|
|
case 4:
|
|
/* space=0x00000020 */
|
|
/* Trim "half-chars", just in case. */
|
|
col_len &= ~3;
|
|
|
|
while (col_len >= 4 && ptr[col_len - 4] == 0x00 &&
|
|
ptr[col_len - 3] == 0x00 && ptr[col_len - 2] == 0x00 &&
|
|
ptr[col_len - 1] == 0x20) {
|
|
col_len -= 4;
|
|
}
|
|
break;
|
|
case 2:
|
|
/* space=0x0020 */
|
|
/* Trim "half-chars", just in case. */
|
|
col_len &= ~1;
|
|
|
|
while (col_len >= 2 && ptr[col_len - 2] == 0x00 &&
|
|
ptr[col_len - 1] == 0x20) {
|
|
col_len -= 2;
|
|
}
|
|
break;
|
|
case 1:
|
|
/* space=0x20 */
|
|
while (col_len > 0 && ptr[col_len - 1] == 0x20) {
|
|
col_len--;
|
|
}
|
|
}
|
|
}
|
|
} else if (comp && type == DATA_MYSQL && dtype_get_mbminlen(dtype) == 1 &&
|
|
dtype_get_mbmaxlen(dtype) > 1) {
|
|
/* In some cases we strip trailing spaces from UTF-8 and other
|
|
multibyte charsets, from FIXED-length CHAR columns, to save
|
|
space. UTF-8 would otherwise normally use 3 * the string length
|
|
bytes to store an ASCII string! */
|
|
|
|
/* We assume that this CHAR field is encoded in a
|
|
variable-length character set where spaces have
|
|
1:1 correspondence to 0x20 bytes, such as UTF-8.
|
|
|
|
Consider a CHAR(n) field, a field of n characters.
|
|
It will contain between n * mbminlen and n * mbmaxlen bytes.
|
|
We will try to truncate it to n bytes by stripping
|
|
space padding. If the field contains single-byte
|
|
characters only, it will be truncated to n characters.
|
|
Consider a CHAR(5) field containing the string
|
|
".a " where "." denotes a 3-byte character represented
|
|
by the bytes "$%&". After our stripping, the string will
|
|
be stored as "$%&a " (5 bytes). The string
|
|
".abc " will be stored as "$%&abc" (6 bytes).
|
|
|
|
The space padding will be restored in row0sel.cc, function
|
|
row_sel_field_store_in_mysql_format(). */
|
|
|
|
ulint n_chars;
|
|
|
|
ut_a(!(dtype_get_len(dtype) % dtype_get_mbmaxlen(dtype)));
|
|
|
|
n_chars = dtype_get_len(dtype) / dtype_get_mbmaxlen(dtype);
|
|
|
|
/* Strip space padding. */
|
|
while (col_len > n_chars && ptr[col_len - 1] == 0x20) {
|
|
col_len--;
|
|
}
|
|
} else if (!row_format_col) {
|
|
/* if mysql data is from a MySQL key value
|
|
since the length is always stored in 2 bytes,
|
|
we need do nothing here. */
|
|
} else if (type == DATA_BLOB) {
|
|
ptr = row_mysql_read_blob_ref(&col_len, mysql_data, col_len);
|
|
} else if (DATA_GEOMETRY_MTYPE(type)) {
|
|
/* We use blob to store geometry data except DATA_POINT
|
|
internally, but in MySQL Layer the datatype is always blob. */
|
|
ptr = row_mysql_read_geometry(&col_len, mysql_data, col_len);
|
|
}
|
|
|
|
dfield_set_data(dfield, ptr, col_len);
|
|
|
|
return (buf);
|
|
}
|
|
|
|
/** Convert a row in the MySQL format to a row in the Innobase format. Note that
|
|
the function to convert a MySQL format key value to an InnoDB dtuple is
|
|
row_sel_convert_mysql_key_to_innobase() in row0sel.cc. */
|
|
static void row_mysql_convert_row_to_innobase(
|
|
dtuple_t *row, /*!< in/out: Innobase row where the
|
|
field type information is already
|
|
copied there! */
|
|
row_prebuilt_t *prebuilt, /*!< in: prebuilt struct where template
|
|
must be of type ROW_MYSQL_WHOLE_ROW */
|
|
const byte *mysql_rec, /*!< in: row in the MySQL format;
|
|
NOTE: do not discard as long as
|
|
row is used, as row may contain
|
|
pointers to this record! */
|
|
mem_heap_t **blob_heap) /*!< in: FIX_ME, remove this after
|
|
server fixes its issue */
|
|
{
|
|
const mysql_row_templ_t *templ;
|
|
dfield_t *dfield;
|
|
ulint i;
|
|
ulint n_col = 0;
|
|
ulint n_v_col = 0;
|
|
ulint n_m_v_col = 0;
|
|
|
|
ut_ad(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW);
|
|
ut_ad(prebuilt->mysql_template);
|
|
|
|
for (i = 0; i < prebuilt->n_template; i++) {
|
|
bool is_multi_val = false;
|
|
|
|
templ = prebuilt->mysql_template + i;
|
|
|
|
if (templ->is_virtual) {
|
|
ut_ad(n_v_col < dtuple_get_n_v_fields(row));
|
|
dfield = dtuple_get_nth_v_field(row, n_v_col);
|
|
n_v_col++;
|
|
if (dfield_is_multi_value(dfield)) {
|
|
is_multi_val = true;
|
|
n_m_v_col++;
|
|
}
|
|
} else {
|
|
dfield = dtuple_get_nth_field(row, n_col);
|
|
n_col++;
|
|
}
|
|
|
|
if (templ->mysql_null_bit_mask != 0) {
|
|
/* Column may be SQL NULL */
|
|
|
|
if (mysql_rec[templ->mysql_null_byte_offset] &
|
|
(byte)(templ->mysql_null_bit_mask)) {
|
|
/* It is SQL NULL */
|
|
|
|
dfield_set_null(dfield);
|
|
|
|
continue;
|
|
}
|
|
}
|
|
|
|
if (is_multi_val) {
|
|
dict_v_col_t *v_col =
|
|
dict_table_get_nth_v_col(prebuilt->table, n_v_col - 1);
|
|
|
|
innobase_get_multi_value(prebuilt->m_mysql_table, v_col->m_col.ind,
|
|
dfield, &prebuilt->mv_data[n_m_v_col - 1], 0,
|
|
dict_table_is_comp(prebuilt->table),
|
|
prebuilt->heap);
|
|
|
|
/* For multi-value data, the deep copy may cost too much.
|
|
So ideally this should be optimized by keeping and reading the
|
|
raw data. However, once more virtual column data needs to be
|
|
calculated later, for example, insert by modify, server will
|
|
overwrites the memory used here. So the safest way is a deep
|
|
copy. */
|
|
dfield_multi_value_dup(dfield, prebuilt->heap);
|
|
} else {
|
|
row_mysql_store_col_in_innobase_format(
|
|
dfield, prebuilt->ins_upd_rec_buff + templ->mysql_col_offset,
|
|
TRUE, /* MySQL row format data */
|
|
mysql_rec + templ->mysql_col_offset, templ->mysql_col_len,
|
|
dict_table_is_comp(prebuilt->table));
|
|
|
|
/* server has issue regarding handling BLOB virtual fields,
|
|
and we need to duplicate it with our own memory here */
|
|
if (templ->is_virtual &&
|
|
DATA_LARGE_MTYPE(dfield_get_type(dfield)->mtype)) {
|
|
if (*blob_heap == NULL) {
|
|
*blob_heap = mem_heap_create(dfield->len);
|
|
}
|
|
dfield_dup(dfield, *blob_heap);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* If there is a FTS doc id column and it is not user supplied (
|
|
generated by server) then assign it a new doc id. */
|
|
if (prebuilt->table->fts) {
|
|
ut_a(prebuilt->table->fts->doc_col != ULINT_UNDEFINED);
|
|
|
|
fts_create_doc_id(prebuilt->table, row, prebuilt->heap);
|
|
}
|
|
}
|
|
|
|
/** Handles user errors and lock waits detected by the database engine.
|
|
@return true if it was a lock wait and we should continue running the
|
|
query thread and in that case the thr is ALREADY in the running state. */
|
|
bool row_mysql_handle_errors(
|
|
dberr_t *new_err, /*!< out: possible new error encountered in
|
|
lock wait, or if no new error, the value
|
|
of trx->error_state at the entry of this
|
|
function */
|
|
trx_t *trx, /*!< in: transaction */
|
|
que_thr_t *thr, /*!< in: query thread, or NULL */
|
|
trx_savept_t *savept) /*!< in: savepoint, or NULL */
|
|
{
|
|
dberr_t err;
|
|
|
|
handle_new_error:
|
|
err = trx->error_state;
|
|
|
|
ut_a(err != DB_SUCCESS);
|
|
|
|
trx->error_state = DB_SUCCESS;
|
|
|
|
switch (err) {
|
|
case DB_GP_WAIT_TIMEOUT:
|
|
case DB_LOCK_WAIT_TIMEOUT:
|
|
if (row_rollback_on_timeout) {
|
|
trx_rollback_to_savepoint(trx, NULL);
|
|
break;
|
|
}
|
|
/* fall through */
|
|
case DB_DUPLICATE_KEY:
|
|
case DB_FOREIGN_DUPLICATE_KEY:
|
|
case DB_TOO_BIG_RECORD:
|
|
case DB_UNDO_RECORD_TOO_BIG:
|
|
case DB_ROW_IS_REFERENCED:
|
|
case DB_NO_REFERENCED_ROW:
|
|
case DB_CANNOT_ADD_CONSTRAINT:
|
|
case DB_TOO_MANY_CONCURRENT_TRXS:
|
|
case DB_OUT_OF_FILE_SPACE:
|
|
case DB_READ_ONLY:
|
|
case DB_FTS_INVALID_DOCID:
|
|
case DB_INTERRUPTED:
|
|
case DB_CANT_CREATE_GEOMETRY_OBJECT:
|
|
case DB_COMPUTE_VALUE_FAILED:
|
|
case DB_LOCK_NOWAIT:
|
|
case DB_BTREE_LEVEL_LIMIT_EXCEEDED:
|
|
DBUG_EXECUTE_IF("row_mysql_crash_if_error", {
|
|
log_buffer_flush_to_disk();
|
|
DBUG_SUICIDE();
|
|
});
|
|
if (savept) {
|
|
/* Roll back the latest, possibly incomplete insertion
|
|
or update */
|
|
|
|
trx_rollback_to_savepoint(trx, savept);
|
|
}
|
|
/* MySQL will roll back the latest SQL statement */
|
|
break;
|
|
case DB_LOCK_WAIT:
|
|
|
|
trx_kill_blocking(trx);
|
|
DEBUG_SYNC_C("before_lock_wait_suspend");
|
|
|
|
lock_wait_suspend_thread(thr);
|
|
|
|
if (trx->error_state != DB_SUCCESS) {
|
|
que_thr_stop_for_mysql(thr);
|
|
|
|
goto handle_new_error;
|
|
}
|
|
|
|
*new_err = err;
|
|
|
|
return (true);
|
|
|
|
case DB_GP_WAIT:
|
|
lizard::gp_wait_suspend_thread(trx);
|
|
if (trx->error_state != DB_SUCCESS) {
|
|
ut_ad(trx->error_state == DB_GP_WAIT_TIMEOUT);
|
|
que_thr_stop_for_mysql(thr);
|
|
|
|
goto handle_new_error;
|
|
}
|
|
|
|
*new_err = err;
|
|
ut_ad(trx->error_state == DB_SUCCESS);
|
|
return true;
|
|
|
|
case DB_DEADLOCK:
|
|
case DB_LOCK_TABLE_FULL:
|
|
/* Roll back the whole transaction; this resolution was added
|
|
to version 3.23.43 */
|
|
|
|
trx_rollback_to_savepoint(trx, NULL);
|
|
break;
|
|
|
|
case DB_MUST_GET_MORE_FILE_SPACE:
|
|
ib::fatal(ER_IB_MSG_972)
|
|
<< "The database cannot continue operation because"
|
|
" of lack of space. You must add a new data file"
|
|
" to my.cnf and restart the database.";
|
|
break;
|
|
|
|
case DB_CORRUPTION:
|
|
ib::error(ER_IB_MSG_973)
|
|
<< "We detected index corruption in an InnoDB type"
|
|
" table. You have to dump + drop + reimport the"
|
|
" table or, in a case of widespread corruption,"
|
|
" dump all InnoDB tables and recreate the whole"
|
|
" tablespace. If the mysqld server crashes after"
|
|
" the startup or when you dump the tables. "
|
|
<< FORCE_RECOVERY_MSG;
|
|
break;
|
|
case DB_FOREIGN_EXCEED_MAX_CASCADE:
|
|
ib::error(ER_IB_MSG_974)
|
|
<< "Cannot delete/update rows with cascading"
|
|
" foreign key constraints that exceed max depth of "
|
|
<< FK_MAX_CASCADE_DEL
|
|
<< ". Please drop excessive"
|
|
" foreign constraints and try again";
|
|
break;
|
|
case DB_SNAPSHOT_OUT_OF_RANGE:
|
|
case DB_AS_OF_INTERNAL:
|
|
case DB_AS_OF_TABLE_DEF_CHANGED:
|
|
case DB_SNAPSHOT_TOO_OLD:
|
|
/** TODO: Check must be flashback query <16-09-20, zanye.zjy> */
|
|
break;
|
|
default:
|
|
ib::fatal(ER_IB_MSG_975)
|
|
<< "Unknown error code " << err << ": " << ut_strerr(err);
|
|
}
|
|
|
|
if (trx->error_state != DB_SUCCESS) {
|
|
*new_err = trx->error_state;
|
|
} else {
|
|
*new_err = err;
|
|
}
|
|
|
|
trx->error_state = DB_SUCCESS;
|
|
|
|
return (false);
|
|
}
|
|
|
|
/** Create a prebuilt struct for a MySQL table handle.
|
|
@return own: a prebuilt struct */
|
|
row_prebuilt_t *row_create_prebuilt(
|
|
dict_table_t *table, /*!< in: Innobase table handle */
|
|
ulint mysql_row_len) /*!< in: length in bytes of a row in
|
|
the MySQL format */
|
|
{
|
|
DBUG_TRACE;
|
|
|
|
row_prebuilt_t *prebuilt;
|
|
mem_heap_t *heap;
|
|
dict_index_t *clust_index;
|
|
dict_index_t *temp_index;
|
|
dtuple_t *ref;
|
|
ulint ref_len;
|
|
uint srch_key_len = 0;
|
|
ulint search_tuple_n_fields;
|
|
|
|
search_tuple_n_fields =
|
|
2 * (table->get_n_cols() + dict_table_get_n_v_cols(table));
|
|
|
|
clust_index = table->first_index();
|
|
|
|
/* Make sure that search_tuple is long enough for clustered index */
|
|
ut_a(2 * table->get_n_cols() >= clust_index->n_fields);
|
|
|
|
ref_len = dict_index_get_n_unique(clust_index);
|
|
|
|
/* Maximum size of the buffer needed for conversion of INTs from
|
|
little endian format to big endian format in an index. An index
|
|
can have maximum 16 columns (MAX_REF_PARTS) in it. Therfore
|
|
Max size for PK: 16 * 8 bytes (BIGINT's size) = 128 bytes
|
|
Max size Secondary index: 16 * 8 bytes + PK = 256 bytes. */
|
|
#define MAX_SRCH_KEY_VAL_BUFFER 2 * (8 * MAX_REF_PARTS)
|
|
|
|
#define PREBUILT_HEAP_INITIAL_SIZE \
|
|
(sizeof(*prebuilt) /* allocd in this function */ \
|
|
+ DTUPLE_EST_ALLOC(search_tuple_n_fields) /* search_tuple */ \
|
|
+ DTUPLE_EST_ALLOC(search_tuple_n_fields) /* m_stop_tuple */ \
|
|
+ DTUPLE_EST_ALLOC(ref_len) /* allocd in row_prebuild_sel_graph() */ \
|
|
+ sizeof(sel_node_t) + sizeof(que_fork_t) + \
|
|
sizeof(que_thr_t) /* allocd in row_get_prebuilt_update_vector() */ \
|
|
+ sizeof(upd_node_t) + sizeof(upd_t) + \
|
|
sizeof(upd_field_t) * table->get_n_cols() + sizeof(que_fork_t) + \
|
|
sizeof(que_thr_t) /* allocd in row_get_prebuilt_insert_row() */ \
|
|
+ sizeof(ins_node_t) /* mysql_row_len could be huge and we are not \
|
|
sure if this prebuilt instance is going to be \
|
|
used in inserts */ \
|
|
+ (mysql_row_len < 256 ? mysql_row_len : 0) + \
|
|
DTUPLE_EST_ALLOC(table->get_n_cols() + dict_table_get_n_v_cols(table)) + \
|
|
sizeof(que_fork_t) + sizeof(que_thr_t) + sizeof(*prebuilt->pcur) + \
|
|
sizeof(*prebuilt->parent) + sizeof(*prebuilt->sample) + \
|
|
sizeof(*prebuilt->clust_pcur))
|
|
|
|
/* Calculate size of key buffer used to store search key in
|
|
InnoDB format. MySQL stores INTs in little endian format and
|
|
InnoDB stores INTs in big endian format with the sign bit
|
|
flipped. All other field types are stored/compared the same
|
|
in MySQL and InnoDB, so we must create a buffer containing
|
|
the INT key parts in InnoDB format.We need two such buffers
|
|
since both start and end keys are used in records_in_range(). */
|
|
|
|
for (temp_index = table->first_index(); temp_index;
|
|
temp_index = temp_index->next()) {
|
|
DBUG_EXECUTE_IF("innodb_srch_key_buffer_max_value",
|
|
ut_a(temp_index->n_user_defined_cols == MAX_REF_PARTS););
|
|
uint temp_len = 0;
|
|
for (uint i = 0; i < temp_index->n_uniq; i++) {
|
|
ulint type = temp_index->fields[i].col->mtype;
|
|
if (type == DATA_INT) {
|
|
temp_len += temp_index->fields[i].fixed_len;
|
|
}
|
|
}
|
|
srch_key_len = std::max(srch_key_len, temp_len);
|
|
}
|
|
|
|
ut_a(srch_key_len <= MAX_SRCH_KEY_VAL_BUFFER);
|
|
|
|
DBUG_EXECUTE_IF("innodb_srch_key_buffer_max_value",
|
|
ut_a(srch_key_len == MAX_SRCH_KEY_VAL_BUFFER););
|
|
|
|
/* We allocate enough space for the objects that are likely to
|
|
be created later in order to minimize the number of malloc()
|
|
calls */
|
|
heap = mem_heap_create(PREBUILT_HEAP_INITIAL_SIZE + 2 * srch_key_len);
|
|
|
|
prebuilt =
|
|
static_cast<row_prebuilt_t *>(mem_heap_zalloc(heap, sizeof(*prebuilt)));
|
|
|
|
prebuilt->magic_n = ROW_PREBUILT_ALLOCATED;
|
|
prebuilt->magic_n2 = ROW_PREBUILT_ALLOCATED;
|
|
|
|
prebuilt->table = table;
|
|
|
|
prebuilt->sql_stat_start = TRUE;
|
|
prebuilt->heap = heap;
|
|
|
|
prebuilt->srch_key_val_len = srch_key_len;
|
|
if (prebuilt->srch_key_val_len) {
|
|
prebuilt->srch_key_val1 = static_cast<byte *>(
|
|
mem_heap_alloc(prebuilt->heap, 2 * prebuilt->srch_key_val_len));
|
|
prebuilt->srch_key_val2 =
|
|
prebuilt->srch_key_val1 + prebuilt->srch_key_val_len;
|
|
} else {
|
|
prebuilt->srch_key_val1 = NULL;
|
|
prebuilt->srch_key_val2 = NULL;
|
|
}
|
|
|
|
prebuilt->pcur = static_cast<btr_pcur_t *>(
|
|
mem_heap_zalloc(prebuilt->heap, sizeof(btr_pcur_t)));
|
|
prebuilt->parent = static_cast<btr_pcur_t *>(
|
|
mem_heap_zalloc(prebuilt->heap, sizeof(btr_pcur_t)));
|
|
prebuilt->clust_pcur = static_cast<btr_pcur_t *>(
|
|
mem_heap_zalloc(prebuilt->heap, sizeof(btr_pcur_t)));
|
|
|
|
void *ptr =
|
|
mem_heap_zalloc(prebuilt->heap, sizeof(lizard::Cleanout_pages) * 2 +
|
|
sizeof(lizard::Cleanout_cursors) * 2);
|
|
|
|
prebuilt->pcur->m_cleanout_pages = static_cast<lizard::Cleanout_pages *>(ptr);
|
|
new (prebuilt->pcur->m_cleanout_pages) lizard::Cleanout_pages();
|
|
|
|
prebuilt->clust_pcur->m_cleanout_pages = prebuilt->pcur->m_cleanout_pages + 1;
|
|
new (prebuilt->clust_pcur->m_cleanout_pages) lizard::Cleanout_pages();
|
|
|
|
prebuilt->pcur->m_cleanout_cursors =
|
|
reinterpret_cast<lizard::Cleanout_cursors *>(
|
|
prebuilt->clust_pcur->m_cleanout_pages + 1);
|
|
|
|
new (prebuilt->pcur->m_cleanout_cursors) lizard::Cleanout_cursors();
|
|
|
|
prebuilt->clust_pcur->m_cleanout_cursors =
|
|
prebuilt->pcur->m_cleanout_cursors + 1;
|
|
new (prebuilt->clust_pcur->m_cleanout_cursors) lizard::Cleanout_cursors();
|
|
|
|
btr_pcur_reset(prebuilt->pcur);
|
|
btr_pcur_reset(prebuilt->parent);
|
|
btr_pcur_reset(prebuilt->clust_pcur);
|
|
|
|
prebuilt->sample = static_cast<btr_sample_t *>(
|
|
mem_heap_zalloc(prebuilt->heap, sizeof(btr_sample_t)));
|
|
prebuilt->sample->init(prebuilt);
|
|
|
|
prebuilt->select_lock_type = LOCK_NONE;
|
|
prebuilt->select_mode = SELECT_ORDINARY;
|
|
|
|
prebuilt->search_tuple = dtuple_create(heap, search_tuple_n_fields);
|
|
prebuilt->m_stop_tuple = dtuple_create(heap, search_tuple_n_fields);
|
|
ut_ad(!prebuilt->m_stop_tuple_found);
|
|
ut_ad(!prebuilt->is_reading_range());
|
|
|
|
ref = dtuple_create(heap, ref_len);
|
|
|
|
dict_index_copy_types(ref, clust_index, ref_len);
|
|
|
|
prebuilt->clust_ref = ref;
|
|
|
|
prebuilt->autoinc_error = DB_SUCCESS;
|
|
prebuilt->autoinc_offset = 0;
|
|
|
|
/* Default to 1, we will set the actual value later in
|
|
ha_innobase::get_auto_increment(). */
|
|
prebuilt->autoinc_increment = 1;
|
|
|
|
prebuilt->autoinc_last_value = 0;
|
|
|
|
/* During UPDATE and DELETE we need the doc id. */
|
|
prebuilt->fts_doc_id = 0;
|
|
|
|
prebuilt->mysql_row_len = mysql_row_len;
|
|
|
|
prebuilt->ins_sel_stmt = false;
|
|
prebuilt->session = NULL;
|
|
|
|
prebuilt->fts_doc_id_in_read_set = 0;
|
|
prebuilt->blob_heap = NULL;
|
|
|
|
prebuilt->skip_serializable_dd_view = false;
|
|
prebuilt->no_autoinc_locking = false;
|
|
|
|
prebuilt->m_no_prefetch = false;
|
|
prebuilt->m_read_virtual_key = false;
|
|
|
|
prebuilt->m_asof_query.reset();
|
|
|
|
return prebuilt;
|
|
}
|
|
|
|
/** Free a prebuilt struct for a MySQL table handle. */
|
|
void row_prebuilt_free(
|
|
row_prebuilt_t *prebuilt, /*!< in, own: prebuilt struct */
|
|
ibool dict_locked) /*!< in: TRUE=data dictionary locked */
|
|
{
|
|
DBUG_TRACE;
|
|
|
|
ut_a(prebuilt->magic_n == ROW_PREBUILT_ALLOCATED);
|
|
ut_a(prebuilt->magic_n2 == ROW_PREBUILT_ALLOCATED);
|
|
|
|
prebuilt->magic_n = ROW_PREBUILT_FREED;
|
|
prebuilt->magic_n2 = ROW_PREBUILT_FREED;
|
|
|
|
/* It is better to fail here on assertion, than to let the destructor of the
|
|
active row_is_reading_range_guard_t modify some random place in memory. */
|
|
ut_a(!prebuilt->is_reading_range());
|
|
|
|
btr_pcur_reset(prebuilt->pcur);
|
|
btr_pcur_reset(prebuilt->parent);
|
|
btr_pcur_reset(prebuilt->clust_pcur);
|
|
|
|
prebuilt->pcur->m_cleanout_pages->~Cleanout_pages();
|
|
prebuilt->clust_pcur->m_cleanout_pages->~Cleanout_pages();
|
|
prebuilt->pcur->m_cleanout_pages = nullptr;
|
|
prebuilt->clust_pcur->m_cleanout_pages = nullptr;
|
|
|
|
prebuilt->pcur->m_cleanout_cursors->~Cleanout_cursors();
|
|
prebuilt->clust_pcur->m_cleanout_cursors->~Cleanout_cursors();
|
|
prebuilt->pcur->m_cleanout_cursors = nullptr;
|
|
prebuilt->clust_pcur->m_cleanout_cursors = nullptr;
|
|
|
|
ut_free(prebuilt->mysql_template);
|
|
|
|
if (prebuilt->ins_graph) {
|
|
que_graph_free_recursive(prebuilt->ins_graph);
|
|
}
|
|
|
|
if (prebuilt->sel_graph) {
|
|
que_graph_free_recursive(prebuilt->sel_graph);
|
|
}
|
|
|
|
if (prebuilt->upd_graph) {
|
|
que_graph_free_recursive(prebuilt->upd_graph);
|
|
}
|
|
|
|
if (prebuilt->blob_heap) {
|
|
row_mysql_prebuilt_free_blob_heap(prebuilt);
|
|
}
|
|
|
|
if (prebuilt->old_vers_heap) {
|
|
mem_heap_free(prebuilt->old_vers_heap);
|
|
}
|
|
|
|
if (prebuilt->fetch_cache[0] != NULL) {
|
|
byte *base = prebuilt->fetch_cache[0] - 4;
|
|
byte *ptr = base;
|
|
|
|
for (ulint i = 0; i < MYSQL_FETCH_CACHE_SIZE; i++) {
|
|
ulint magic1 = mach_read_from_4(ptr);
|
|
ut_a(magic1 == ROW_PREBUILT_FETCH_MAGIC_N);
|
|
ptr += 4;
|
|
|
|
byte *row = ptr;
|
|
ut_a(row == prebuilt->fetch_cache[i]);
|
|
ptr += prebuilt->mysql_row_len;
|
|
|
|
ulint magic2 = mach_read_from_4(ptr);
|
|
ut_a(magic2 == ROW_PREBUILT_FETCH_MAGIC_N);
|
|
ptr += 4;
|
|
}
|
|
|
|
ut_free(base);
|
|
}
|
|
|
|
if (prebuilt->rtr_info) {
|
|
rtr_clean_rtr_info(prebuilt->rtr_info, true);
|
|
}
|
|
|
|
if (prebuilt->table) {
|
|
ut_ad(!prebuilt->table->is_fts_aux());
|
|
dd_table_close(prebuilt->table, NULL, NULL, dict_locked);
|
|
}
|
|
|
|
prebuilt->m_lob_undo.destroy();
|
|
|
|
mem_heap_free(prebuilt->heap);
|
|
}
|
|
|
|
void row_update_prebuilt_trx(row_prebuilt_t *prebuilt, trx_t *trx) {
|
|
ut_a(trx->magic_n == TRX_MAGIC_N);
|
|
ut_a(prebuilt->magic_n == ROW_PREBUILT_ALLOCATED);
|
|
ut_a(prebuilt->magic_n2 == ROW_PREBUILT_ALLOCATED);
|
|
|
|
prebuilt->trx = trx;
|
|
|
|
if (prebuilt->ins_graph) {
|
|
prebuilt->ins_graph->trx = trx;
|
|
}
|
|
|
|
if (prebuilt->upd_graph) {
|
|
prebuilt->upd_graph->trx = trx;
|
|
}
|
|
|
|
if (prebuilt->sel_graph) {
|
|
prebuilt->sel_graph->trx = trx;
|
|
}
|
|
}
|
|
|
|
/** Gets pointer to a prebuilt dtuple used in insertions. If the insert graph
|
|
has not yet been built in the prebuilt struct, then this function first
|
|
builds it.
|
|
@return prebuilt dtuple; the column type information is also set in it */
|
|
static dtuple_t *row_get_prebuilt_insert_row(
|
|
row_prebuilt_t *prebuilt) /*!< in: prebuilt struct in MySQL
|
|
handle */
|
|
{
|
|
dict_table_t *table = prebuilt->table;
|
|
|
|
ut_ad(prebuilt && table && prebuilt->trx);
|
|
|
|
if (prebuilt->ins_node != 0) {
|
|
prebuilt->ins_node->ins_multi_val_pos = 0;
|
|
|
|
/* Check if indexes have been dropped or added and we
|
|
may need to rebuild the row insert template. */
|
|
|
|
if (prebuilt->trx_id == table->def_trx_id &&
|
|
UT_LIST_GET_LEN(prebuilt->ins_node->entry_list) ==
|
|
UT_LIST_GET_LEN(table->indexes)) {
|
|
return (prebuilt->ins_node->row);
|
|
}
|
|
|
|
ut_ad(prebuilt->trx_id < table->def_trx_id);
|
|
|
|
que_graph_free_recursive(prebuilt->ins_graph);
|
|
|
|
prebuilt->ins_graph = 0;
|
|
}
|
|
|
|
/* Create an insert node and query graph to the prebuilt struct */
|
|
|
|
ins_node_t *node;
|
|
|
|
node = ins_node_create(INS_DIRECT, table, prebuilt->heap);
|
|
|
|
prebuilt->ins_node = node;
|
|
|
|
if (prebuilt->ins_upd_rec_buff == 0) {
|
|
prebuilt->ins_upd_rec_buff = static_cast<byte *>(
|
|
mem_heap_alloc(prebuilt->heap, prebuilt->mysql_row_len));
|
|
}
|
|
|
|
if (table->n_m_v_cols > 0 && prebuilt->mv_data == nullptr) {
|
|
prebuilt->mv_data = static_cast<multi_value_data *>(mem_heap_zalloc(
|
|
prebuilt->heap, table->n_m_v_cols * sizeof(*prebuilt->mv_data)));
|
|
for (ulint i = 0; i < table->n_m_v_cols; i++) {
|
|
prebuilt->mv_data[i].alloc(multi_value_data::s_default_allocate_num,
|
|
false, prebuilt->heap);
|
|
}
|
|
}
|
|
|
|
dtuple_t *row;
|
|
|
|
row = dtuple_create_with_vcol(prebuilt->heap, table->get_n_cols(),
|
|
dict_table_get_n_v_cols(table));
|
|
|
|
dict_table_copy_types(row, table);
|
|
|
|
ins_node_set_new_row(node, row);
|
|
|
|
prebuilt->ins_graph = static_cast<que_fork_t *>(
|
|
que_node_get_parent(pars_complete_graph_for_exec(
|
|
node, prebuilt->trx, prebuilt->heap, prebuilt)));
|
|
|
|
prebuilt->ins_graph->state = QUE_FORK_ACTIVE;
|
|
|
|
prebuilt->trx_id = table->def_trx_id;
|
|
|
|
return (prebuilt->ins_node->row);
|
|
}
|
|
|
|
/** Updates the table modification counter and calculates new estimates
|
|
for table and index statistics if necessary. */
|
|
UNIV_INLINE
|
|
void row_update_statistics_if_needed(dict_table_t *table) /*!< in: table */
|
|
{
|
|
ib_uint64_t counter;
|
|
ib_uint64_t n_rows;
|
|
|
|
if (!table->stat_initialized) {
|
|
DBUG_EXECUTE_IF("test_upd_stats_if_needed_not_inited",
|
|
fprintf(stderr,
|
|
"test_upd_stats_if_needed_not_inited"
|
|
" was executed\n"););
|
|
return;
|
|
}
|
|
|
|
counter = table->stat_modified_counter++;
|
|
n_rows = dict_table_get_n_rows(table);
|
|
|
|
if (dict_stats_is_persistent_enabled(table)) {
|
|
if (counter > n_rows / 10 /* 10% */
|
|
&& dict_stats_auto_recalc_is_enabled(table)) {
|
|
dict_stats_recalc_pool_add(table);
|
|
table->stat_modified_counter = 0;
|
|
}
|
|
return;
|
|
}
|
|
|
|
/* Calculate new statistics if 1 / 16 of table has been modified
|
|
since the last time a statistics batch was run.
|
|
We calculate statistics at most every 16th round, since we may have
|
|
a counter table which is very small and updated very often. */
|
|
|
|
if (counter > 16 + n_rows / 16 /* 6.25% */) {
|
|
ut_ad(!mutex_own(&dict_sys->mutex));
|
|
/* this will reset table->stat_modified_counter to 0 */
|
|
dict_stats_update(table, DICT_STATS_RECALC_TRANSIENT);
|
|
}
|
|
}
|
|
|
|
/** Sets an AUTO_INC type lock on the table mentioned in prebuilt. The
|
|
AUTO_INC lock gives exclusive access to the auto-inc counter of the
|
|
table. The lock is reserved only for the duration of an SQL statement.
|
|
It is not compatible with another AUTO_INC or exclusive lock on the
|
|
table.
|
|
@return error code or DB_SUCCESS */
|
|
dberr_t row_lock_table_autoinc_for_mysql(
|
|
row_prebuilt_t *prebuilt) /*!< in: prebuilt struct in the MySQL
|
|
table handle */
|
|
{
|
|
trx_t *trx = prebuilt->trx;
|
|
ins_node_t *node = prebuilt->ins_node;
|
|
const dict_table_t *table = prebuilt->table;
|
|
que_thr_t *thr;
|
|
dberr_t err;
|
|
ibool was_lock_wait;
|
|
|
|
/* If we already hold an AUTOINC lock on the table then do nothing.
|
|
Note: We peek at the value of the current owner without acquiring
|
|
the lock mutex. */
|
|
if (trx == table->autoinc_trx) {
|
|
return (DB_SUCCESS);
|
|
}
|
|
|
|
trx->op_info = "setting auto-inc lock";
|
|
|
|
row_get_prebuilt_insert_row(prebuilt);
|
|
node = prebuilt->ins_node;
|
|
|
|
/* We use the insert query graph as the dummy graph needed
|
|
in the lock module call */
|
|
|
|
thr = que_fork_get_first_thr(prebuilt->ins_graph);
|
|
|
|
que_thr_move_to_run_state_for_mysql(thr, trx);
|
|
|
|
run_again:
|
|
thr->run_node = node;
|
|
thr->prev_node = node;
|
|
|
|
/* It may be that the current session has not yet started
|
|
its transaction, or it has been committed: */
|
|
|
|
trx_start_if_not_started_xa(trx, true);
|
|
|
|
err = lock_table(0, prebuilt->table, LOCK_AUTO_INC, thr);
|
|
|
|
trx->error_state = err;
|
|
|
|
if (err != DB_SUCCESS) {
|
|
que_thr_stop_for_mysql(thr);
|
|
|
|
was_lock_wait = row_mysql_handle_errors(&err, trx, thr, NULL);
|
|
|
|
if (was_lock_wait) {
|
|
goto run_again;
|
|
}
|
|
|
|
trx->op_info = "";
|
|
|
|
return (err);
|
|
}
|
|
|
|
que_thr_stop_for_mysql_no_error(thr, trx);
|
|
|
|
trx->op_info = "";
|
|
|
|
return (err);
|
|
}
|
|
|
|
/** Sets a table lock on the table mentioned in prebuilt.
|
|
@param[in] prebuilt table handle
|
|
@return error code or DB_SUCCESS */
|
|
dberr_t row_lock_table(row_prebuilt_t *prebuilt) {
|
|
trx_t *trx = prebuilt->trx;
|
|
que_thr_t *thr;
|
|
dberr_t err;
|
|
ibool was_lock_wait;
|
|
|
|
trx->op_info = "setting table lock";
|
|
|
|
if (prebuilt->sel_graph == NULL) {
|
|
/* Build a dummy select query graph */
|
|
row_prebuild_sel_graph(prebuilt);
|
|
}
|
|
|
|
/* We use the select query graph as the dummy graph needed
|
|
in the lock module call */
|
|
|
|
thr = que_fork_get_first_thr(prebuilt->sel_graph);
|
|
|
|
que_thr_move_to_run_state_for_mysql(thr, trx);
|
|
|
|
run_again:
|
|
thr->run_node = thr;
|
|
thr->prev_node = thr->common.parent;
|
|
|
|
/* It may be that the current session has not yet started
|
|
its transaction, or it has been committed: */
|
|
|
|
trx_start_if_not_started_xa(trx, false);
|
|
|
|
err =
|
|
lock_table(0, prebuilt->table,
|
|
static_cast<enum lock_mode>(prebuilt->select_lock_type), thr);
|
|
|
|
trx->error_state = err;
|
|
|
|
if (err != DB_SUCCESS) {
|
|
que_thr_stop_for_mysql(thr);
|
|
|
|
was_lock_wait = row_mysql_handle_errors(&err, trx, thr, NULL);
|
|
|
|
if (was_lock_wait) {
|
|
goto run_again;
|
|
}
|
|
|
|
trx->op_info = "";
|
|
|
|
return (err);
|
|
}
|
|
|
|
que_thr_stop_for_mysql_no_error(thr, trx);
|
|
|
|
trx->op_info = "";
|
|
|
|
return (err);
|
|
}
|
|
|
|
/** Perform explicit rollback in absence of UNDO logs.
|
|
@param[in] index apply rollback action on this index
|
|
@param[in] entry entry to remove/rollback.
|
|
@param[in,out] thr thread handler.
|
|
@param[in,out] mtr mini transaction.
|
|
@return error code or DB_SUCCESS */
|
|
static dberr_t row_explicit_rollback(dict_index_t *index, const dtuple_t *entry,
|
|
que_thr_t *thr, mtr_t *mtr) {
|
|
btr_cur_t cursor;
|
|
ulint flags;
|
|
ulint offsets_[REC_OFFS_NORMAL_SIZE];
|
|
ulint *offsets;
|
|
mem_heap_t *heap = NULL;
|
|
dberr_t err;
|
|
|
|
rec_offs_init(offsets_);
|
|
flags = BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG;
|
|
|
|
btr_cur_search_to_nth_level_with_no_latch(index, 0, entry, PAGE_CUR_LE,
|
|
&cursor, __FILE__, __LINE__, mtr);
|
|
|
|
offsets = rec_get_offsets(btr_cur_get_rec(&cursor), index, offsets_,
|
|
ULINT_UNDEFINED, &heap);
|
|
|
|
if (index->is_clustered()) {
|
|
err = btr_cur_del_mark_set_clust_rec(flags, btr_cur_get_block(&cursor),
|
|
btr_cur_get_rec(&cursor), index,
|
|
offsets, thr, entry, mtr);
|
|
} else {
|
|
err = btr_cur_del_mark_set_sec_rec(flags, &cursor, TRUE, thr, mtr);
|
|
}
|
|
ut_ad(err == DB_SUCCESS);
|
|
|
|
/* Void call just to set mtr modification flag
|
|
to true failing which block is not scheduled for flush*/
|
|
byte *log_ptr = mlog_open(mtr, 0);
|
|
ut_ad(log_ptr == NULL);
|
|
if (log_ptr != NULL) {
|
|
/* To keep complier happy. */
|
|
mlog_close(mtr, log_ptr);
|
|
}
|
|
|
|
if (heap != NULL) {
|
|
mem_heap_free(heap);
|
|
}
|
|
|
|
return (err);
|
|
}
|
|
|
|
/** Convert a row in the MySQL format to a row in the Innobase format.
|
|
This is specialized function used for intrinsic table with reduce branching.
|
|
@param[in,out] row row where field values are copied.
|
|
@param[in] prebuilt prebuilt handler
|
|
@param[in] mysql_rec row in mysql format. */
|
|
static void row_mysql_to_innobase(dtuple_t *row, row_prebuilt_t *prebuilt,
|
|
const byte *mysql_rec) {
|
|
ut_ad(prebuilt->table->is_intrinsic());
|
|
|
|
const byte *ptr = mysql_rec;
|
|
|
|
for (ulint i = 0; i < prebuilt->n_template; i++) {
|
|
const mysql_row_templ_t *templ;
|
|
dfield_t *dfield;
|
|
|
|
templ = prebuilt->mysql_template + i;
|
|
dfield = dtuple_get_nth_field(row, i);
|
|
|
|
/* Check if column has null value. */
|
|
if (templ->mysql_null_bit_mask != 0) {
|
|
if (mysql_rec[templ->mysql_null_byte_offset] &
|
|
(byte)(templ->mysql_null_bit_mask)) {
|
|
dfield_set_null(dfield);
|
|
continue;
|
|
}
|
|
}
|
|
|
|
/* Extract the column value. */
|
|
ptr = mysql_rec + templ->mysql_col_offset;
|
|
const dtype_t *dtype = dfield_get_type(dfield);
|
|
ulint col_len = templ->mysql_col_len;
|
|
|
|
ut_ad(dtype->mtype == DATA_INT || dtype->mtype == DATA_CHAR ||
|
|
dtype->mtype == DATA_MYSQL || dtype->mtype == DATA_VARCHAR ||
|
|
dtype->mtype == DATA_VARMYSQL || dtype->mtype == DATA_BINARY ||
|
|
dtype->mtype == DATA_FIXBINARY || dtype->mtype == DATA_FLOAT ||
|
|
dtype->mtype == DATA_DOUBLE || dtype->mtype == DATA_DECIMAL ||
|
|
dtype->mtype == DATA_BLOB || dtype->mtype == DATA_GEOMETRY ||
|
|
dtype->mtype == DATA_POINT || dtype->mtype == DATA_VAR_POINT);
|
|
|
|
#ifdef UNIV_DEBUG
|
|
if (dtype_get_mysql_type(dtype) == DATA_MYSQL_TRUE_VARCHAR) {
|
|
ut_ad(templ->mysql_length_bytes > 0);
|
|
}
|
|
#endif /* UNIV_DEBUG */
|
|
|
|
/* For now varchar field this has to be always 0 so
|
|
memcpy of 0 bytes shouldn't affect the original col_len. */
|
|
if (dtype->mtype == DATA_INT) {
|
|
/* Convert and Store in big-endian. */
|
|
byte *buf = prebuilt->ins_upd_rec_buff + templ->mysql_col_offset;
|
|
byte *copy_to = buf + col_len;
|
|
for (;;) {
|
|
copy_to--;
|
|
*copy_to = *ptr;
|
|
if (copy_to == buf) {
|
|
break;
|
|
}
|
|
ptr++;
|
|
}
|
|
|
|
if (!(dtype->prtype & DATA_UNSIGNED)) {
|
|
*buf ^= 128;
|
|
}
|
|
|
|
ptr = buf;
|
|
buf += col_len;
|
|
} else if (dtype_get_mysql_type(dtype) == DATA_MYSQL_TRUE_VARCHAR) {
|
|
ut_ad(dtype->mtype == DATA_VARCHAR || dtype->mtype == DATA_VARMYSQL ||
|
|
dtype->mtype == DATA_BINARY);
|
|
|
|
col_len = 0;
|
|
row_mysql_read_true_varchar(&col_len, ptr, templ->mysql_length_bytes);
|
|
ptr += templ->mysql_length_bytes;
|
|
} else if (dtype->mtype == DATA_BLOB) {
|
|
ptr = row_mysql_read_blob_ref(&col_len, ptr, col_len);
|
|
} else if (DATA_GEOMETRY_MTYPE(dtype->mtype)) {
|
|
/* Point, Var-Point, Geometry */
|
|
ptr = row_mysql_read_geometry(&col_len, ptr, col_len);
|
|
}
|
|
|
|
dfield_set_data(dfield, ptr, col_len);
|
|
}
|
|
}
|
|
|
|
/** Does an insert for MySQL using cursor interface.
|
|
Cursor interface is low level interface that directly interacts at
|
|
Storage Level by-passing all the locking and transaction semantics.
|
|
For InnoDB case, this will also by-pass hidden column generation.
|
|
@param[in] mysql_rec row in the MySQL format
|
|
@param[in,out] prebuilt prebuilt struct in MySQL handle
|
|
@return error code or DB_SUCCESS */
|
|
static dberr_t row_insert_for_mysql_using_cursor(const byte *mysql_rec,
|
|
row_prebuilt_t *prebuilt) {
|
|
dberr_t err = DB_SUCCESS;
|
|
ins_node_t *node = NULL;
|
|
que_thr_t *thr = NULL;
|
|
mtr_t mtr;
|
|
|
|
/* Step-1: Get the reference of row to insert. */
|
|
row_get_prebuilt_insert_row(prebuilt);
|
|
node = prebuilt->ins_node;
|
|
thr = que_fork_get_first_thr(prebuilt->ins_graph);
|
|
|
|
/* Step-2: Convert row from MySQL row format to InnoDB row format. */
|
|
row_mysql_to_innobase(node->row, prebuilt, mysql_rec);
|
|
|
|
/* Step-3: Append row-id index is not unique. */
|
|
dict_index_t *clust_index = node->table->first_index();
|
|
|
|
assert_lizard_dict_table_check(node->table);
|
|
|
|
if (!dict_index_is_unique(clust_index)) {
|
|
dict_sys_write_row_id(node->row_id_buf,
|
|
dict_table_get_next_table_sess_row_id(node->table));
|
|
}
|
|
|
|
trx_write_trx_id(node->trx_id_buf,
|
|
dict_table_get_next_table_sess_trx_id(node->table));
|
|
|
|
/* Step-4: Iterate over all the indexes and insert entries. */
|
|
dict_index_t *inserted_upto = NULL;
|
|
node->entry = UT_LIST_GET_FIRST(node->entry_list);
|
|
for (dict_index_t *index = UT_LIST_GET_FIRST(node->table->indexes);
|
|
index != NULL; index = UT_LIST_GET_NEXT(indexes, index),
|
|
node->entry = UT_LIST_GET_NEXT(tuple_list, node->entry)) {
|
|
node->index = index;
|
|
err = row_ins_index_entry_set_vals(node->index, node->entry, node->row);
|
|
if (err != DB_SUCCESS) {
|
|
break;
|
|
}
|
|
|
|
if (index->is_clustered()) {
|
|
err = row_ins_clust_index_entry(node->index, node->entry, thr, 0, false);
|
|
} else {
|
|
err = row_ins_sec_index_entry(node->index, node->entry, thr, false);
|
|
}
|
|
|
|
if (err == DB_SUCCESS) {
|
|
inserted_upto = index;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* Step-5: If error is encountered while inserting entries to any
|
|
of the index then entries inserted to previous indexes are removed
|
|
explicity. Automatic rollback is not in action as UNDO logs are
|
|
turned-off. */
|
|
if (err != DB_SUCCESS) {
|
|
node->entry = UT_LIST_GET_FIRST(node->entry_list);
|
|
|
|
mtr_start(&mtr);
|
|
dict_disable_redo_if_temporary(node->table, &mtr);
|
|
|
|
for (dict_index_t *index = UT_LIST_GET_FIRST(node->table->indexes);
|
|
inserted_upto != NULL; index = UT_LIST_GET_NEXT(indexes, index),
|
|
node->entry = UT_LIST_GET_NEXT(tuple_list, node->entry)) {
|
|
row_explicit_rollback(index, node->entry, thr, &mtr);
|
|
|
|
if (index == inserted_upto) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
mtr_commit(&mtr);
|
|
} else {
|
|
/* Not protected by dict_table_stats_lock() for performance
|
|
reasons, we would rather get garbage in stat_n_rows (which is
|
|
just an estimate anyway) than protecting the following code
|
|
, with a latch. */
|
|
dict_table_n_rows_inc(node->table);
|
|
|
|
srv_stats.n_rows_inserted.inc();
|
|
}
|
|
|
|
thr_get_trx(thr)->error_state = DB_SUCCESS;
|
|
return (err);
|
|
}
|
|
|
|
/** Does an insert for MySQL using INSERT graph. This function will run/execute
|
|
INSERT graph.
|
|
@param[in] mysql_rec row in the MySQL format
|
|
@param[in,out] prebuilt prebuilt struct in MySQL handle
|
|
@return error code or DB_SUCCESS */
|
|
static dberr_t row_insert_for_mysql_using_ins_graph(const byte *mysql_rec,
|
|
row_prebuilt_t *prebuilt) {
|
|
trx_savept_t savept;
|
|
que_thr_t *thr;
|
|
dberr_t err;
|
|
ibool was_lock_wait;
|
|
trx_t *trx = prebuilt->trx;
|
|
ins_node_t *node = prebuilt->ins_node;
|
|
dict_table_t *table = prebuilt->table;
|
|
/* FIX_ME: This blob heap is used to compensate an issue in server
|
|
for virtual column blob handling */
|
|
mem_heap_t *blob_heap = NULL;
|
|
|
|
ut_ad(trx);
|
|
ut_a(prebuilt->magic_n == ROW_PREBUILT_ALLOCATED);
|
|
ut_a(prebuilt->magic_n2 == ROW_PREBUILT_ALLOCATED);
|
|
|
|
assert_lizard_dict_table_check(prebuilt->table);
|
|
|
|
if (dict_table_is_discarded(prebuilt->table)) {
|
|
ib::error(ER_IB_MSG_976)
|
|
<< "The table " << prebuilt->table->name
|
|
<< " doesn't have a corresponding tablespace, it was"
|
|
" discarded.";
|
|
|
|
return (DB_TABLESPACE_DELETED);
|
|
|
|
} else if (prebuilt->table->ibd_file_missing) {
|
|
ib::error(ER_IB_MSG_977)
|
|
<< ".ibd file is missing for table " << prebuilt->table->name;
|
|
|
|
return (DB_TABLESPACE_NOT_FOUND);
|
|
|
|
} else if (srv_force_recovery &&
|
|
!(srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN &&
|
|
dict_sys_t::is_dd_table_id(prebuilt->table->id))) {
|
|
/* Allow to modify hardcoded DD tables in some scenario to
|
|
make DDL work */
|
|
|
|
ib::error(ER_IB_MSG_978) << MODIFICATIONS_NOT_ALLOWED_MSG_FORCE_RECOVERY;
|
|
|
|
return (DB_READ_ONLY);
|
|
}
|
|
|
|
DBUG_EXECUTE_IF("mark_table_corrupted", {
|
|
/* Mark the table corrupted for the clustered index */
|
|
dict_index_t *index = table->first_index();
|
|
ut_ad(index->is_clustered());
|
|
dict_set_corrupted(index);
|
|
});
|
|
|
|
if (table->is_corrupted()) {
|
|
ib::error(ER_IB_MSG_979) << "Table " << table->name << " is corrupt.";
|
|
return (DB_TABLE_CORRUPT);
|
|
}
|
|
|
|
trx->op_info = "inserting";
|
|
|
|
row_mysql_delay_if_needed();
|
|
|
|
trx_start_if_not_started_xa(trx, true);
|
|
|
|
row_get_prebuilt_insert_row(prebuilt);
|
|
node = prebuilt->ins_node;
|
|
|
|
row_mysql_convert_row_to_innobase(node->row, prebuilt, mysql_rec, &blob_heap);
|
|
|
|
savept = trx_savept_take(trx);
|
|
|
|
thr = que_fork_get_first_thr(prebuilt->ins_graph);
|
|
|
|
if (prebuilt->sql_stat_start) {
|
|
node->state = INS_NODE_SET_IX_LOCK;
|
|
prebuilt->sql_stat_start = FALSE;
|
|
} else {
|
|
node->state = INS_NODE_ALLOC_ROW_ID;
|
|
}
|
|
|
|
que_thr_move_to_run_state_for_mysql(thr, trx);
|
|
|
|
run_again:
|
|
thr->run_node = node;
|
|
thr->prev_node = node;
|
|
|
|
row_ins_step(thr);
|
|
|
|
DEBUG_SYNC_C("ib_after_row_insert_step");
|
|
|
|
err = trx->error_state;
|
|
|
|
if (err != DB_SUCCESS) {
|
|
error_exit:
|
|
que_thr_stop_for_mysql(thr);
|
|
|
|
/* FIXME: What's this ? */
|
|
thr->lock_state = QUE_THR_LOCK_ROW;
|
|
|
|
was_lock_wait = row_mysql_handle_errors(&err, trx, thr, &savept);
|
|
|
|
thr->lock_state = QUE_THR_LOCK_NOLOCK;
|
|
|
|
if (was_lock_wait) {
|
|
ut_ad(node->state == INS_NODE_INSERT_ENTRIES ||
|
|
node->state == INS_NODE_ALLOC_ROW_ID);
|
|
goto run_again;
|
|
}
|
|
|
|
trx->op_info = "";
|
|
|
|
if (blob_heap != NULL) {
|
|
mem_heap_free(blob_heap);
|
|
}
|
|
|
|
return (err);
|
|
}
|
|
|
|
if (dict_table_has_fts_index(table)) {
|
|
doc_id_t doc_id;
|
|
|
|
/* Extract the doc id from the hidden FTS column */
|
|
doc_id = fts_get_doc_id_from_row(table, node->row);
|
|
|
|
if (doc_id <= 0) {
|
|
ib::error(ER_IB_MSG_980) << "FTS Doc ID must be large than 0";
|
|
err = DB_FTS_INVALID_DOCID;
|
|
trx->error_state = DB_FTS_INVALID_DOCID;
|
|
goto error_exit;
|
|
}
|
|
|
|
if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
|
|
doc_id_t next_doc_id = table->fts->cache->next_doc_id;
|
|
|
|
if (doc_id < next_doc_id) {
|
|
ib::error(ER_IB_MSG_981)
|
|
<< "FTS Doc ID must be large than " << next_doc_id - 1
|
|
<< " for table " << table->name;
|
|
|
|
err = DB_FTS_INVALID_DOCID;
|
|
trx->error_state = DB_FTS_INVALID_DOCID;
|
|
goto error_exit;
|
|
}
|
|
|
|
/* Difference between Doc IDs are restricted within
|
|
4 bytes integer. See fts_get_encoded_len(). Consecutive
|
|
doc_ids difference should not exceed
|
|
FTS_DOC_ID_MAX_STEP value. */
|
|
|
|
if (doc_id - next_doc_id >= FTS_DOC_ID_MAX_STEP) {
|
|
ib::error(ER_IB_MSG_982) << "Doc ID " << doc_id
|
|
<< " is too big. Its difference with"
|
|
" largest used Doc ID "
|
|
<< next_doc_id - 1
|
|
<< " cannot"
|
|
" exceed or equal to "
|
|
<< FTS_DOC_ID_MAX_STEP;
|
|
err = DB_FTS_INVALID_DOCID;
|
|
trx->error_state = DB_FTS_INVALID_DOCID;
|
|
goto error_exit;
|
|
}
|
|
}
|
|
|
|
if (table->skip_alter_undo) {
|
|
if (trx->fts_trx == NULL) {
|
|
trx->fts_trx = fts_trx_create(trx);
|
|
}
|
|
|
|
fts_trx_table_t ftt;
|
|
ftt.table = table;
|
|
ftt.fts_trx = trx->fts_trx;
|
|
|
|
fts_add_doc_from_tuple(&ftt, doc_id, node->row);
|
|
|
|
} else {
|
|
/* Pass NULL for the columns affected, since an INSERT
|
|
affects all FTS indexes. */
|
|
fts_trx_add_op(trx, table, doc_id, FTS_INSERT, NULL);
|
|
}
|
|
}
|
|
|
|
que_thr_stop_for_mysql_no_error(thr, trx);
|
|
|
|
srv_stats.n_rows_inserted.inc();
|
|
|
|
/* Not protected by dict_table_stats_lock() for performance
|
|
reasons, we would rather get garbage in stat_n_rows (which is
|
|
just an estimate anyway) than protecting the following code
|
|
with a latch. */
|
|
dict_table_n_rows_inc(table);
|
|
|
|
row_update_statistics_if_needed(table);
|
|
trx->op_info = "";
|
|
|
|
if (blob_heap != NULL) {
|
|
mem_heap_free(blob_heap);
|
|
}
|
|
|
|
return (err);
|
|
}
|
|
|
|
/** Does an insert for MySQL.
|
|
@param[in] mysql_rec row in the MySQL format
|
|
@param[in,out] prebuilt prebuilt struct in MySQL handle
|
|
@return error code or DB_SUCCESS*/
|
|
dberr_t row_insert_for_mysql(const byte *mysql_rec, row_prebuilt_t *prebuilt) {
|
|
/* For intrinsic tables there a lot of restrictions that can be
|
|
relaxed including locking of table, transaction handling, etc.
|
|
Use direct cursor interface for inserting to intrinsic tables. */
|
|
if (prebuilt->table->is_intrinsic()) {
|
|
return (row_insert_for_mysql_using_cursor(mysql_rec, prebuilt));
|
|
} else {
|
|
return (row_insert_for_mysql_using_ins_graph(mysql_rec, prebuilt));
|
|
}
|
|
}
|
|
|
|
void row_prebuild_sel_graph(row_prebuilt_t *prebuilt) {
|
|
sel_node_t *node;
|
|
|
|
ut_ad(prebuilt && prebuilt->trx);
|
|
|
|
if (prebuilt->sel_graph == NULL) {
|
|
node = sel_node_create(prebuilt->heap);
|
|
|
|
prebuilt->sel_graph = static_cast<que_fork_t *>(que_node_get_parent(
|
|
pars_complete_graph_for_exec(static_cast<sel_node_t *>(node),
|
|
prebuilt->trx, prebuilt->heap, prebuilt)));
|
|
|
|
prebuilt->sel_graph->state = QUE_FORK_ACTIVE;
|
|
}
|
|
}
|
|
|
|
/** Creates an query graph node of 'update' type to be used in the MySQL
|
|
interface.
|
|
@return own: update node */
|
|
upd_node_t *row_create_update_node_for_mysql(
|
|
dict_table_t *table, /*!< in: table to update */
|
|
mem_heap_t *heap) /*!< in: mem heap from which allocated */
|
|
{
|
|
upd_node_t *node;
|
|
|
|
DBUG_TRACE;
|
|
|
|
node = upd_node_create(heap);
|
|
|
|
node->in_mysql_interface = TRUE;
|
|
node->is_delete = FALSE;
|
|
node->searched_update = FALSE;
|
|
node->select = NULL;
|
|
node->pcur = btr_pcur_create_for_mysql();
|
|
|
|
DBUG_PRINT("info", ("node: %p, pcur: %p", node, node->pcur));
|
|
|
|
node->table = table;
|
|
|
|
node->update =
|
|
upd_create(table->get_n_cols() + dict_table_get_n_v_cols(table), heap);
|
|
|
|
node->update->table = table;
|
|
|
|
node->update_n_fields = table->get_n_cols();
|
|
|
|
UT_LIST_INIT(node->columns, &sym_node_t::col_var_list);
|
|
|
|
node->has_clust_rec_x_lock = TRUE;
|
|
node->cmpl_info = 0;
|
|
|
|
node->table_sym = NULL;
|
|
node->col_assign_list = NULL;
|
|
|
|
node->del_multi_val_pos = 0;
|
|
node->upd_multi_val_pos = 0;
|
|
|
|
return node;
|
|
}
|
|
|
|
/** Gets pointer to a prebuilt update vector used in updates. If the update
|
|
graph has not yet been built in the prebuilt struct, then this function
|
|
first builds it.
|
|
@return prebuilt update vector */
|
|
upd_t *row_get_prebuilt_update_vector(
|
|
row_prebuilt_t *prebuilt) /*!< in: prebuilt struct in MySQL
|
|
handle */
|
|
{
|
|
dict_table_t *table = prebuilt->table;
|
|
upd_node_t *node;
|
|
|
|
ut_ad(prebuilt && table && prebuilt->trx);
|
|
|
|
if (prebuilt->upd_node == NULL) {
|
|
/* Not called before for this handle: create an update node
|
|
and query graph to the prebuilt struct */
|
|
|
|
node = row_create_update_node_for_mysql(table, prebuilt->heap);
|
|
|
|
prebuilt->upd_node = node;
|
|
|
|
prebuilt->upd_graph = static_cast<que_fork_t *>(que_node_get_parent(
|
|
pars_complete_graph_for_exec(static_cast<upd_node_t *>(node),
|
|
prebuilt->trx, prebuilt->heap, prebuilt)));
|
|
|
|
prebuilt->upd_graph->state = QUE_FORK_ACTIVE;
|
|
}
|
|
|
|
return (prebuilt->upd_node->update);
|
|
}
|
|
|
|
/********************************************************************
|
|
Handle an update of a column that has an FTS index. */
|
|
static void row_fts_do_update(
|
|
trx_t *trx, /* in: transaction */
|
|
dict_table_t *table, /* in: Table with FTS index */
|
|
doc_id_t old_doc_id, /* in: old document id */
|
|
doc_id_t new_doc_id) /* in: new document id */
|
|
{
|
|
if (trx->fts_next_doc_id) {
|
|
fts_trx_add_op(trx, table, old_doc_id, FTS_DELETE, NULL);
|
|
if (new_doc_id != FTS_NULL_DOC_ID) {
|
|
fts_trx_add_op(trx, table, new_doc_id, FTS_INSERT, NULL);
|
|
}
|
|
}
|
|
}
|
|
|
|
/************************************************************************
|
|
Handles FTS matters for an update or a delete.
|
|
NOTE: should not be called if the table does not have an FTS index. .*/
|
|
static dberr_t row_fts_update_or_delete(
|
|
row_prebuilt_t *prebuilt) /* in: prebuilt struct in MySQL
|
|
handle */
|
|
{
|
|
trx_t *trx = prebuilt->trx;
|
|
dict_table_t *table = prebuilt->table;
|
|
upd_node_t *node = prebuilt->upd_node;
|
|
doc_id_t old_doc_id = prebuilt->fts_doc_id;
|
|
|
|
DBUG_TRACE;
|
|
|
|
ut_a(dict_table_has_fts_index(prebuilt->table));
|
|
|
|
/* Deletes are simple; get them out of the way first. */
|
|
if (node->is_delete) {
|
|
/* A delete affects all FTS indexes, so we pass NULL */
|
|
fts_trx_add_op(trx, table, old_doc_id, FTS_DELETE, NULL);
|
|
} else {
|
|
doc_id_t new_doc_id;
|
|
new_doc_id = fts_read_doc_id((byte *)&trx->fts_next_doc_id);
|
|
ut_ad(new_doc_id != UINT64_UNDEFINED);
|
|
if (new_doc_id == 0) {
|
|
ib::error(ER_IB_MSG_983) << "InnoDB FTS: Doc ID cannot be 0";
|
|
return (DB_FTS_INVALID_DOCID);
|
|
}
|
|
row_fts_do_update(trx, table, old_doc_id, new_doc_id);
|
|
}
|
|
|
|
return DB_SUCCESS;
|
|
}
|
|
|
|
/** Initialize the Doc ID system for FK table with FTS index */
|
|
static void init_fts_doc_id_for_ref(
|
|
dict_table_t *table, /*!< in: table */
|
|
ulint *depth) /*!< in: recusive call depth */
|
|
{
|
|
dict_foreign_t *foreign;
|
|
|
|
table->fk_max_recusive_level = 0;
|
|
|
|
(*depth)++;
|
|
|
|
/* Limit on tables involved in cascading delete/update */
|
|
if (*depth > FK_MAX_CASCADE_DEL) {
|
|
return;
|
|
}
|
|
|
|
/* Loop through this table's referenced list and also
|
|
recursively traverse each table's foreign table list */
|
|
for (dict_foreign_set::iterator it = table->referenced_set.begin();
|
|
it != table->referenced_set.end(); ++it) {
|
|
foreign = *it;
|
|
|
|
ut_ad(foreign->foreign_table != NULL);
|
|
|
|
if (foreign->foreign_table->fts != NULL) {
|
|
fts_init_doc_id(foreign->foreign_table);
|
|
}
|
|
|
|
if (!foreign->foreign_table->referenced_set.empty() &&
|
|
foreign->foreign_table != table) {
|
|
init_fts_doc_id_for_ref(foreign->foreign_table, depth);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* A functor for decrementing counters. */
|
|
class ib_dec_counter {
|
|
public:
|
|
ib_dec_counter() {}
|
|
|
|
void operator()(upd_node_t *node) {
|
|
ut_ad(node->table->n_foreign_key_checks_running > 0);
|
|
os_atomic_decrement_ulint(&node->table->n_foreign_key_checks_running, 1);
|
|
}
|
|
};
|
|
|
|
/** Do an in-place update in the intrinsic table. The update should not
|
|
modify any of the keys and it should not change the size of any fields.
|
|
@param[in] node the update node.
|
|
@return DB_SUCCESS on success, an error code on failure. */
|
|
static dberr_t row_update_inplace_for_intrinsic(const upd_node_t *node) {
|
|
mtr_t mtr;
|
|
dict_table_t *table = node->table;
|
|
mem_heap_t *heap = node->heap;
|
|
dtuple_t *entry = node->row;
|
|
ulint offsets_[REC_OFFS_NORMAL_SIZE];
|
|
ulint *offsets = offsets_;
|
|
|
|
ut_ad(table->is_intrinsic());
|
|
rec_offs_init(offsets_);
|
|
mtr_start(&mtr);
|
|
mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
|
|
btr_pcur_t pcur;
|
|
|
|
dict_index_t *index = table->first_index();
|
|
|
|
entry = row_build_index_entry(node->row, node->ext, index, heap);
|
|
|
|
btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_MODIFY_LEAF, &pcur, &mtr);
|
|
|
|
rec_t *rec = btr_pcur_get_rec(&pcur);
|
|
|
|
ut_ad(!page_rec_is_infimum(rec));
|
|
ut_ad(!page_rec_is_supremum(rec));
|
|
|
|
offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
|
|
|
|
ut_ad(!cmp_dtuple_rec(entry, rec, index, offsets));
|
|
ut_ad(!rec_get_deleted_flag(rec, dict_table_is_comp(index->table)));
|
|
ut_ad(btr_pcur_get_block(&pcur)->made_dirty_with_no_latch);
|
|
|
|
bool size_changes =
|
|
row_upd_changes_field_size_or_external(index, offsets, node->update);
|
|
|
|
if (size_changes) {
|
|
mtr_commit(&mtr);
|
|
return (DB_FAIL);
|
|
}
|
|
|
|
row_upd_rec_in_place(rec, index, offsets, node->update, NULL);
|
|
mtr_commit(&mtr);
|
|
|
|
return (DB_SUCCESS);
|
|
}
|
|
|
|
typedef std::vector<btr_pcur_t, ut_allocator<btr_pcur_t>> cursors_t;
|
|
|
|
/** Delete row from table (corresponding entries from all the indexes).
|
|
Function will maintain cursor to the entries to invoke explicity rollback
|
|
just incase update action following delete fails.
|
|
|
|
@param[in] node update node carrying information to delete.
|
|
@param[out] delete_entries vector of cursor to deleted entries.
|
|
@param[in] restore_delete if true, then restore DELETE records by
|
|
unmarking delete.
|
|
@return error code or DB_SUCCESS */
|
|
static dberr_t row_delete_for_mysql_using_cursor(const upd_node_t *node,
|
|
cursors_t &delete_entries,
|
|
bool restore_delete) {
|
|
mtr_t mtr;
|
|
dict_table_t *table = node->table;
|
|
mem_heap_t *heap = node->heap;
|
|
dberr_t err = DB_SUCCESS;
|
|
dtuple_t *entry;
|
|
|
|
mtr_start(&mtr);
|
|
dict_disable_redo_if_temporary(table, &mtr);
|
|
|
|
for (dict_index_t *index = UT_LIST_GET_FIRST(table->indexes);
|
|
index != NULL && err == DB_SUCCESS && !restore_delete;
|
|
index = UT_LIST_GET_NEXT(indexes, index)) {
|
|
entry = row_build_index_entry(node->row, node->ext, index, heap);
|
|
|
|
btr_pcur_t pcur;
|
|
|
|
btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_MODIFY_LEAF, &pcur, &mtr);
|
|
|
|
#ifdef UNIV_DEBUG
|
|
ulint offsets_[REC_OFFS_NORMAL_SIZE];
|
|
ulint *offsets = offsets_;
|
|
rec_offs_init(offsets_);
|
|
|
|
offsets = rec_get_offsets(btr_cur_get_rec(btr_pcur_get_btr_cur(&pcur)),
|
|
index, offsets, ULINT_UNDEFINED, &heap);
|
|
|
|
ut_ad(!cmp_dtuple_rec(entry, btr_cur_get_rec(btr_pcur_get_btr_cur(&pcur)),
|
|
index, offsets));
|
|
#endif /* UNIV_DEBUG */
|
|
|
|
ut_ad(!rec_get_deleted_flag(btr_cur_get_rec(btr_pcur_get_btr_cur(&pcur)),
|
|
dict_table_is_comp(index->table)));
|
|
|
|
ut_ad(btr_pcur_get_block(&pcur)->made_dirty_with_no_latch);
|
|
|
|
if (page_rec_is_infimum(btr_pcur_get_rec(&pcur)) ||
|
|
page_rec_is_supremum(btr_pcur_get_rec(&pcur))) {
|
|
err = DB_ERROR;
|
|
} else {
|
|
btr_cur_t *btr_cur = btr_pcur_get_btr_cur(&pcur);
|
|
|
|
btr_rec_set_deleted_flag(
|
|
btr_cur_get_rec(btr_cur),
|
|
buf_block_get_page_zip(btr_cur_get_block(btr_cur)), TRUE);
|
|
|
|
/* Void call just to set mtr modification flag
|
|
to true failing which block is not scheduled for flush*/
|
|
byte *log_ptr = mlog_open(&mtr, 0);
|
|
ut_ad(log_ptr == NULL);
|
|
if (log_ptr != NULL) {
|
|
/* To keep complier happy. */
|
|
mlog_close(&mtr, log_ptr);
|
|
}
|
|
|
|
btr_pcur_store_position(&pcur, &mtr);
|
|
|
|
delete_entries.push_back(pcur);
|
|
}
|
|
}
|
|
|
|
if (err != DB_SUCCESS || restore_delete) {
|
|
/* Rollback half-way delete action that might have been
|
|
applied to few of the indexes. */
|
|
cursors_t::iterator end = delete_entries.end();
|
|
for (cursors_t::iterator it = delete_entries.begin(); it != end; ++it) {
|
|
ibool success = btr_pcur_restore_position(BTR_MODIFY_LEAF, &(*it), &mtr);
|
|
|
|
if (!success) {
|
|
ut_a(success);
|
|
} else {
|
|
btr_cur_t *btr_cur = btr_pcur_get_btr_cur(&(*it));
|
|
|
|
ut_ad(btr_cur_get_block(btr_cur)->made_dirty_with_no_latch);
|
|
|
|
btr_rec_set_deleted_flag(
|
|
btr_cur_get_rec(btr_cur),
|
|
buf_block_get_page_zip(btr_cur_get_block(btr_cur)), FALSE);
|
|
|
|
/* Void call just to set mtr modification flag
|
|
to true failing which block is not scheduled for
|
|
flush. */
|
|
byte *log_ptr = mlog_open(&mtr, 0);
|
|
ut_ad(log_ptr == NULL);
|
|
if (log_ptr != NULL) {
|
|
/* To keep complier happy. */
|
|
mlog_close(&mtr, log_ptr);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
mtr_commit(&mtr);
|
|
|
|
return (err);
|
|
}
|
|
|
|
/** Does an update of a row for MySQL by inserting new entry with update values.
|
|
@param[in] node update node carrying information to delete.
|
|
@param[out] delete_entries vector of cursor to deleted entries.
|
|
@param[in] thr thread handler
|
|
@return error code or DB_SUCCESS */
|
|
static dberr_t row_update_for_mysql_using_cursor(const upd_node_t *node,
|
|
cursors_t &delete_entries,
|
|
que_thr_t *thr) {
|
|
dberr_t err = DB_SUCCESS;
|
|
dict_table_t *table = node->table;
|
|
mem_heap_t *heap = node->heap;
|
|
dtuple_t *entry;
|
|
dfield_t *trx_id_field;
|
|
|
|
/* Step-1: Update row-id column if table doesn't have unique index. */
|
|
if (!dict_index_is_unique(table->first_index())) {
|
|
/* Update the row_id column. */
|
|
dfield_t *row_id_field;
|
|
|
|
row_id_field = dtuple_get_nth_field(node->upd_row, table->get_n_cols() - 2);
|
|
|
|
dict_sys_write_row_id(static_cast<byte *>(row_id_field->data),
|
|
dict_table_get_next_table_sess_row_id(node->table));
|
|
}
|
|
|
|
/* Step-2: Update the trx_id column. */
|
|
trx_id_field = dtuple_get_nth_field(node->upd_row, table->get_n_cols() - 1);
|
|
trx_write_trx_id(static_cast<byte *>(trx_id_field->data),
|
|
dict_table_get_next_table_sess_trx_id(node->table));
|
|
|
|
/* Step-3: Check if UPDATE can lead to DUPLICATE key violation.
|
|
If yes, then avoid executing it and return error. Only after ensuring
|
|
that UPDATE is safe execute it as we can't rollback. */
|
|
for (dict_index_t *index = UT_LIST_GET_FIRST(table->indexes);
|
|
index != NULL && err == DB_SUCCESS;
|
|
index = UT_LIST_GET_NEXT(indexes, index)) {
|
|
entry = row_build_index_entry(node->upd_row, node->upd_ext, index, heap);
|
|
|
|
if (index->is_clustered()) {
|
|
if (!dict_index_is_auto_gen_clust(index)) {
|
|
err = row_ins_clust_index_entry(
|
|
index, entry, thr, node->upd_ext ? node->upd_ext->n_ext : 0, true);
|
|
}
|
|
} else {
|
|
err = row_ins_sec_index_entry(index, entry, thr, true);
|
|
}
|
|
}
|
|
|
|
if (err != DB_SUCCESS) {
|
|
/* This suggest update can't be executed safely.
|
|
Avoid executing update. Rollback DELETE action. */
|
|
row_delete_for_mysql_using_cursor(node, delete_entries, true);
|
|
}
|
|
|
|
/* Step-4: It is now safe to execute update if there is no error */
|
|
for (dict_index_t *index = UT_LIST_GET_FIRST(table->indexes);
|
|
index != NULL && err == DB_SUCCESS;
|
|
index = UT_LIST_GET_NEXT(indexes, index)) {
|
|
entry = row_build_index_entry(node->upd_row, node->upd_ext, index, heap);
|
|
|
|
if (index->is_clustered()) {
|
|
err = row_ins_clust_index_entry(
|
|
index, entry, thr, node->upd_ext ? node->upd_ext->n_ext : 0, false);
|
|
/* Commit the open mtr as we are processing UPDATE. */
|
|
if (index->last_ins_cur) {
|
|
index->last_ins_cur->release();
|
|
}
|
|
} else {
|
|
err = row_ins_sec_index_entry(index, entry, thr, false);
|
|
}
|
|
|
|
/* Too big record is valid error and suggestion is to use
|
|
bigger page-size or different format. */
|
|
ut_ad(err == DB_SUCCESS || err == DB_TOO_BIG_RECORD ||
|
|
err == DB_OUT_OF_FILE_SPACE);
|
|
|
|
if (err == DB_TOO_BIG_RECORD) {
|
|
row_delete_for_mysql_using_cursor(node, delete_entries, true);
|
|
}
|
|
}
|
|
|
|
return (err);
|
|
}
|
|
|
|
/** Does an update or delete of a row for MySQL.
|
|
@param[in] mysql_rec row in the MySQL format
|
|
@param[in,out] prebuilt prebuilt struct in MySQL handle
|
|
@return error code or DB_SUCCESS */
|
|
static dberr_t row_del_upd_for_mysql_using_cursor(const byte *mysql_rec,
|
|
row_prebuilt_t *prebuilt) {
|
|
dberr_t err = DB_SUCCESS;
|
|
upd_node_t *node;
|
|
cursors_t delete_entries;
|
|
dict_index_t *clust_index;
|
|
que_thr_t *thr = NULL;
|
|
|
|
/* Step-0: If there is cached insert position commit it before
|
|
starting delete/update action as this can result in btree structure
|
|
to change. */
|
|
thr = que_fork_get_first_thr(prebuilt->upd_graph);
|
|
clust_index = prebuilt->table->first_index();
|
|
|
|
if (clust_index->last_ins_cur) {
|
|
clust_index->last_ins_cur->release();
|
|
}
|
|
|
|
/* Step-1: Select the appropriate cursor that will help build
|
|
the original row and updated row. */
|
|
node = prebuilt->upd_node;
|
|
if (prebuilt->pcur->m_btr_cur.index == clust_index) {
|
|
btr_pcur_copy_stored_position(node->pcur, prebuilt->pcur);
|
|
} else {
|
|
btr_pcur_copy_stored_position(node->pcur, prebuilt->clust_pcur);
|
|
}
|
|
|
|
ut_ad(prebuilt->table->is_intrinsic());
|
|
ut_ad(!prebuilt->table->n_v_cols);
|
|
|
|
/* Internal table is created by optimiser. So there
|
|
should not be any virtual columns. */
|
|
row_upd_store_row(prebuilt->trx, node, NULL, NULL);
|
|
|
|
if (!node->is_delete) {
|
|
/* UPDATE operation */
|
|
|
|
bool key_changed = false;
|
|
|
|
dict_table_t *table = prebuilt->table;
|
|
|
|
for (dict_index_t *index = UT_LIST_GET_FIRST(table->indexes); index != NULL;
|
|
index = UT_LIST_GET_NEXT(indexes, index)) {
|
|
key_changed = row_upd_changes_ord_field_binary(
|
|
index, node->update, thr, node->upd_row, node->upd_ext, nullptr);
|
|
|
|
if (key_changed) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!key_changed) {
|
|
err = row_update_inplace_for_intrinsic(node);
|
|
|
|
if (err == DB_SUCCESS) {
|
|
return (err);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Step-2: Execute DELETE operation. */
|
|
err = row_delete_for_mysql_using_cursor(node, delete_entries, false);
|
|
|
|
/* Step-3: If only DELETE operation then exit immediately. */
|
|
if (node->is_delete) {
|
|
if (err == DB_SUCCESS) {
|
|
dict_table_n_rows_dec(prebuilt->table);
|
|
srv_stats.n_rows_deleted.inc();
|
|
}
|
|
}
|
|
|
|
if (err == DB_SUCCESS && !node->is_delete) {
|
|
/* Step-4: Complete UPDATE operation by inserting new row with
|
|
updated data. */
|
|
err = row_update_for_mysql_using_cursor(node, delete_entries, thr);
|
|
|
|
if (err == DB_SUCCESS) {
|
|
srv_stats.n_rows_updated.inc();
|
|
}
|
|
}
|
|
|
|
thr_get_trx(thr)->error_state = DB_SUCCESS;
|
|
cursors_t::iterator end = delete_entries.end();
|
|
for (cursors_t::iterator it = delete_entries.begin(); it != end; ++it) {
|
|
btr_pcur_close(&(*it));
|
|
}
|
|
|
|
return (err);
|
|
}
|
|
|
|
/** Does an update or delete of a row for MySQL.
|
|
@param[in] mysql_rec row in the MySQL format
|
|
@param[in,out] prebuilt prebuilt struct in MySQL handle
|
|
@return error code or DB_SUCCESS */
|
|
static dberr_t row_update_for_mysql_using_upd_graph(const byte *mysql_rec,
|
|
row_prebuilt_t *prebuilt) {
|
|
trx_savept_t savept;
|
|
dberr_t err;
|
|
que_thr_t *thr;
|
|
ibool was_lock_wait;
|
|
dict_index_t *clust_index;
|
|
upd_node_t *node;
|
|
dict_table_t *table = prebuilt->table;
|
|
trx_t *trx = prebuilt->trx;
|
|
ulint fk_depth = 0;
|
|
bool got_s_lock = false;
|
|
|
|
DBUG_TRACE;
|
|
|
|
ut_ad(trx);
|
|
ut_a(prebuilt->magic_n == ROW_PREBUILT_ALLOCATED);
|
|
ut_a(prebuilt->magic_n2 == ROW_PREBUILT_ALLOCATED);
|
|
UT_NOT_USED(mysql_rec);
|
|
|
|
assert_lizard_dict_table_check(table);
|
|
|
|
if (prebuilt->table->ibd_file_missing) {
|
|
ib::error(ER_IB_MSG_984)
|
|
<< "MySQL is trying to use a table handle but the"
|
|
" .ibd file for table "
|
|
<< prebuilt->table->name
|
|
<< " does not exist. Have you deleted"
|
|
" the .ibd file from the database directory under"
|
|
" the MySQL datadir, or have you used DISCARD"
|
|
" TABLESPACE? "
|
|
<< TROUBLESHOOTING_MSG;
|
|
return DB_ERROR;
|
|
}
|
|
|
|
/* Allow to modify hardcoded DD tables in some scenario to
|
|
make DDL work */
|
|
if (srv_force_recovery > 0 &&
|
|
!(srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN &&
|
|
dict_sys_t::is_dd_table_id(prebuilt->table->id))) {
|
|
ib::error(ER_IB_MSG_985) << MODIFICATIONS_NOT_ALLOWED_MSG_FORCE_RECOVERY;
|
|
return DB_READ_ONLY;
|
|
}
|
|
|
|
DEBUG_SYNC_C("innodb_row_update_for_mysql_begin");
|
|
|
|
trx->op_info = "updating or deleting";
|
|
|
|
row_mysql_delay_if_needed();
|
|
|
|
init_fts_doc_id_for_ref(table, &fk_depth);
|
|
|
|
trx_start_if_not_started_xa(trx, true);
|
|
|
|
if (dict_table_is_referenced_by_foreign_key(table)) {
|
|
/*TODO: use foreign key MDL to protect foreign
|
|
key tables(wl#6049) */
|
|
init_fts_doc_id_for_ref(table, &fk_depth);
|
|
}
|
|
|
|
node = prebuilt->upd_node;
|
|
node->del_multi_val_pos = 0;
|
|
node->upd_multi_val_pos = 0;
|
|
|
|
clust_index = table->first_index();
|
|
|
|
if (prebuilt->pcur->m_btr_cur.index == clust_index) {
|
|
btr_pcur_copy_stored_position(node->pcur, prebuilt->pcur);
|
|
} else {
|
|
btr_pcur_copy_stored_position(node->pcur, prebuilt->clust_pcur);
|
|
}
|
|
|
|
ut_a(node->pcur->m_rel_pos == BTR_PCUR_ON);
|
|
|
|
/* MySQL seems to call rnd_pos before updating each row it
|
|
has cached: we can get the correct cursor position from
|
|
prebuilt->pcur; NOTE that we cannot build the row reference
|
|
from mysql_rec if the clustered index was automatically
|
|
generated for the table: MySQL does not know anything about
|
|
the row id used as the clustered index key */
|
|
|
|
savept = trx_savept_take(trx);
|
|
|
|
thr = que_fork_get_first_thr(prebuilt->upd_graph);
|
|
|
|
node->state = UPD_NODE_UPDATE_CLUSTERED;
|
|
|
|
ut_ad(!prebuilt->sql_stat_start);
|
|
|
|
que_thr_move_to_run_state_for_mysql(thr, trx);
|
|
|
|
run_again:
|
|
|
|
thr->run_node = node;
|
|
thr->prev_node = node;
|
|
thr->fk_cascade_depth = 0;
|
|
row_upd_step(thr);
|
|
|
|
err = trx->error_state;
|
|
|
|
if (err != DB_SUCCESS) {
|
|
que_thr_stop_for_mysql(thr);
|
|
|
|
if (err == DB_RECORD_NOT_FOUND) {
|
|
trx->error_state = DB_SUCCESS;
|
|
trx->op_info = "";
|
|
|
|
if (thr->fk_cascade_depth > 0) {
|
|
que_graph_free_recursive(node);
|
|
}
|
|
goto error;
|
|
}
|
|
|
|
thr->lock_state = QUE_THR_LOCK_ROW;
|
|
|
|
DEBUG_SYNC(trx->mysql_thd, "row_update_for_mysql_error");
|
|
|
|
was_lock_wait = row_mysql_handle_errors(&err, trx, thr, &savept);
|
|
thr->lock_state = QUE_THR_LOCK_NOLOCK;
|
|
|
|
if (was_lock_wait) {
|
|
goto run_again;
|
|
}
|
|
|
|
trx->op_info = "";
|
|
goto error;
|
|
}
|
|
|
|
que_thr_stop_for_mysql_no_error(thr, trx);
|
|
|
|
if (dict_table_has_fts_index(table) &&
|
|
trx->fts_next_doc_id != UINT64_UNDEFINED) {
|
|
err = row_fts_update_or_delete(prebuilt);
|
|
ut_ad(err == DB_SUCCESS);
|
|
if (err != DB_SUCCESS) {
|
|
goto error;
|
|
}
|
|
}
|
|
|
|
/* Completed cascading operations (if any) */
|
|
if (got_s_lock) {
|
|
row_mysql_unfreeze_data_dictionary(trx);
|
|
}
|
|
|
|
if (node->is_delete) {
|
|
/* Not protected by dict_table_stats_lock() for performance
|
|
reasons, we would rather get garbage in stat_n_rows (which is
|
|
just an estimate anyway) than protecting the following code
|
|
with a latch. */
|
|
dict_table_n_rows_dec(prebuilt->table);
|
|
|
|
srv_stats.n_rows_deleted.inc();
|
|
} else {
|
|
srv_stats.n_rows_updated.inc();
|
|
}
|
|
|
|
/* We update table statistics only if it is a DELETE or UPDATE
|
|
that changes indexed columns, UPDATEs that change only non-indexed
|
|
columns would not affect statistics. */
|
|
if (node->is_delete || !(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
|
|
row_update_statistics_if_needed(prebuilt->table);
|
|
}
|
|
|
|
trx->op_info = "";
|
|
|
|
return err;
|
|
|
|
error:
|
|
if (got_s_lock) {
|
|
row_mysql_unfreeze_data_dictionary(trx);
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
/** Does an update or delete of a row for MySQL.
|
|
@param[in] mysql_rec row in the MySQL format
|
|
@param[in,out] prebuilt prebuilt struct in MySQL handle
|
|
@return error code or DB_SUCCESS */
|
|
dberr_t row_update_for_mysql(const byte *mysql_rec, row_prebuilt_t *prebuilt) {
|
|
if (prebuilt->table->is_intrinsic()) {
|
|
return (row_del_upd_for_mysql_using_cursor(mysql_rec, prebuilt));
|
|
} else {
|
|
ut_a(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW);
|
|
return (row_update_for_mysql_using_upd_graph(mysql_rec, prebuilt));
|
|
}
|
|
}
|
|
|
|
/** Delete all rows for the given table by freeing/truncating indexes.
|
|
@param[in,out] table table handler */
|
|
void row_delete_all_rows(dict_table_t *table) {
|
|
ut_ad(table->is_temporary());
|
|
dict_index_t *index;
|
|
|
|
index = table->first_index();
|
|
/* Step-0: If there is cached insert position along with mtr
|
|
commit it before starting delete/update action. */
|
|
if (index->last_ins_cur) {
|
|
index->last_ins_cur->release();
|
|
}
|
|
|
|
bool found;
|
|
const page_size_t page_size(fil_space_get_page_size(table->space, &found));
|
|
ut_a(found);
|
|
|
|
/* Step-1: Now truncate all the indexes and re-create them.
|
|
Note: This is ddl action even though delete all rows is
|
|
DML action. Any error during this action is ir-reversible. */
|
|
for (index = UT_LIST_GET_FIRST(table->indexes); index != NULL;
|
|
index = UT_LIST_GET_NEXT(indexes, index)) {
|
|
ut_ad(index->space == table->space);
|
|
const page_id_t root(index->space, index->page);
|
|
btr_free(root, page_size);
|
|
|
|
mtr_t mtr;
|
|
|
|
mtr.start();
|
|
mtr.set_log_mode(MTR_LOG_NO_REDO);
|
|
index->page = btr_create(index->type, index->space, page_size, index->id,
|
|
index, &mtr);
|
|
ut_ad(index->page != FIL_NULL);
|
|
mtr.commit();
|
|
}
|
|
}
|
|
|
|
/** This can only be used when this session is using a READ COMMITTED or READ
|
|
UNCOMMITTED isolation level. Before calling this function
|
|
row_search_for_mysql() must have initialized prebuilt->new_rec_locks to store
|
|
the information which new record locks really were set. This function removes
|
|
a newly set clustered index record lock under prebuilt->pcur or
|
|
prebuilt->clust_pcur. Thus, this implements a 'mini-rollback' that releases
|
|
the latest clustered index record lock we set.
|
|
|
|
@param[in,out] prebuilt prebuilt struct in MySQL handle
|
|
@param[in] has_latches_on_recs TRUE if called so that we have the
|
|
latches on the records under pcur
|
|
and clust_pcur, and we do not need
|
|
to reposition the cursors. */
|
|
void row_unlock_for_mysql(row_prebuilt_t *prebuilt, ibool has_latches_on_recs) {
|
|
btr_pcur_t *pcur = prebuilt->pcur;
|
|
btr_pcur_t *clust_pcur = prebuilt->clust_pcur;
|
|
trx_t *trx = prebuilt->trx;
|
|
|
|
ut_ad(prebuilt != NULL);
|
|
ut_ad(trx != NULL);
|
|
ut_ad(trx->allow_semi_consistent());
|
|
|
|
if (dict_index_is_spatial(prebuilt->index)) {
|
|
return;
|
|
}
|
|
|
|
trx->op_info = "unlock_row";
|
|
|
|
if (std::count(prebuilt->new_rec_lock,
|
|
prebuilt->new_rec_lock + row_prebuilt_t::LOCK_COUNT, true)) {
|
|
const rec_t *rec;
|
|
dict_index_t *index;
|
|
trx_id_t rec_trx_id;
|
|
mtr_t mtr;
|
|
|
|
mtr_start(&mtr);
|
|
|
|
/* Restore the cursor position and find the record */
|
|
|
|
if (!has_latches_on_recs) {
|
|
btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, &mtr);
|
|
}
|
|
|
|
rec = btr_pcur_get_rec(pcur);
|
|
index = btr_pcur_get_btr_cur(pcur)->index;
|
|
|
|
if (prebuilt->new_rec_lock[row_prebuilt_t::LOCK_CLUST_PCUR]) {
|
|
/* Restore the cursor position and find the record
|
|
in the clustered index. */
|
|
|
|
if (!has_latches_on_recs) {
|
|
btr_pcur_restore_position(BTR_SEARCH_LEAF, clust_pcur, &mtr);
|
|
}
|
|
|
|
rec = btr_pcur_get_rec(clust_pcur);
|
|
index = btr_pcur_get_btr_cur(clust_pcur)->index;
|
|
}
|
|
|
|
if (!index->is_clustered()) {
|
|
/* This is not a clustered index record. We
|
|
do not know how to unlock the record. */
|
|
goto no_unlock;
|
|
}
|
|
|
|
/* If the record has been modified by this
|
|
transaction, do not unlock it. */
|
|
|
|
if (index->trx_id_offset) {
|
|
rec_trx_id = trx_read_trx_id(rec + index->trx_id_offset);
|
|
} else {
|
|
mem_heap_t *heap = NULL;
|
|
ulint offsets_[REC_OFFS_NORMAL_SIZE];
|
|
ulint *offsets = offsets_;
|
|
|
|
rec_offs_init(offsets_);
|
|
offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
|
|
|
|
rec_trx_id = row_get_rec_trx_id(rec, index, offsets);
|
|
|
|
if (UNIV_LIKELY_NULL(heap)) {
|
|
mem_heap_free(heap);
|
|
}
|
|
}
|
|
|
|
if (rec_trx_id != trx->id) {
|
|
/* We did not update the record: unlock it */
|
|
|
|
if (prebuilt->new_rec_lock[row_prebuilt_t::LOCK_PCUR]) {
|
|
rec = btr_pcur_get_rec(pcur);
|
|
|
|
lock_rec_unlock(
|
|
trx, btr_pcur_get_block(pcur), rec,
|
|
static_cast<enum lock_mode>(prebuilt->select_lock_type));
|
|
}
|
|
|
|
if (prebuilt->new_rec_lock[row_prebuilt_t::LOCK_CLUST_PCUR]) {
|
|
rec = btr_pcur_get_rec(clust_pcur);
|
|
|
|
lock_rec_unlock(
|
|
trx, btr_pcur_get_block(clust_pcur), rec,
|
|
static_cast<enum lock_mode>(prebuilt->select_lock_type));
|
|
}
|
|
}
|
|
no_unlock:
|
|
mtr_commit(&mtr);
|
|
}
|
|
|
|
trx->op_info = "";
|
|
}
|
|
|
|
/** Does a cascaded delete or set null in a foreign key operation.
|
|
@return error code or DB_SUCCESS */
|
|
dberr_t row_update_cascade_for_mysql(
|
|
que_thr_t *thr, /*!< in: query thread */
|
|
upd_node_t *node, /*!< in: update node used in the cascade
|
|
or set null operation */
|
|
dict_table_t *table) /*!< in: table where we do the operation */
|
|
{
|
|
dberr_t err;
|
|
trx_t *trx;
|
|
|
|
trx = thr_get_trx(thr);
|
|
|
|
/* Increment fk_cascade_depth to record the recursive call depth on
|
|
a single update/delete that affects multiple tables chained
|
|
together with foreign key relations. */
|
|
thr->fk_cascade_depth++;
|
|
|
|
if (thr->fk_cascade_depth > FK_MAX_CASCADE_DEL) {
|
|
return (DB_FOREIGN_EXCEED_MAX_CASCADE);
|
|
}
|
|
run_again:
|
|
thr->run_node = node;
|
|
thr->prev_node = node;
|
|
|
|
DEBUG_SYNC_C("foreign_constraint_update_cascade");
|
|
TABLE *temp = thr->prebuilt->m_mysql_table;
|
|
thr->prebuilt->m_mysql_table = NULL;
|
|
row_upd_step(thr);
|
|
thr->prebuilt->m_mysql_table = temp;
|
|
/* The recursive call for cascading update/delete happens
|
|
in above row_upd_step(), reset the counter once we come
|
|
out of the recursive call, so it does not accumulate for
|
|
different row deletes */
|
|
thr->fk_cascade_depth = 0;
|
|
|
|
err = trx->error_state;
|
|
|
|
/* Note that the cascade node is a subnode of another InnoDB
|
|
query graph node. We do a normal lock wait in this node, but
|
|
all errors are handled by the parent node. */
|
|
|
|
if (err == DB_LOCK_WAIT) {
|
|
/* Handle lock wait here */
|
|
|
|
que_thr_stop_for_mysql(thr);
|
|
|
|
lock_wait_suspend_thread(thr);
|
|
|
|
/* Note that a lock wait may also end in a lock wait timeout,
|
|
or this transaction is picked as a victim in selective
|
|
deadlock resolution */
|
|
|
|
if (trx->error_state != DB_SUCCESS) {
|
|
return (trx->error_state);
|
|
}
|
|
|
|
/* Retry operation after a normal lock wait */
|
|
|
|
goto run_again;
|
|
}
|
|
|
|
if (err != DB_SUCCESS) {
|
|
return (err);
|
|
}
|
|
|
|
if (node->is_delete) {
|
|
/* Not protected by dict_table_stats_lock() for performance
|
|
reasons, we would rather get garbage in stat_n_rows (which is
|
|
just an estimate anyway) than protecting the following code
|
|
with a latch. */
|
|
dict_table_n_rows_dec(table);
|
|
|
|
srv_stats.n_rows_deleted.add((size_t)trx->id, 1);
|
|
} else {
|
|
srv_stats.n_rows_updated.add((size_t)trx->id, 1);
|
|
}
|
|
|
|
row_update_statistics_if_needed(table);
|
|
|
|
return (err);
|
|
}
|
|
|
|
/** Checks if a table is such that we automatically created a clustered
|
|
index on it (on row id).
|
|
@return true if the clustered index was generated automatically */
|
|
ibool row_table_got_default_clust_index(
|
|
const dict_table_t *table) /*!< in: table */
|
|
{
|
|
const dict_index_t *clust_index;
|
|
|
|
clust_index = table->first_index();
|
|
|
|
return (clust_index->get_col(0)->mtype == DATA_SYS);
|
|
}
|
|
|
|
/** Locks the data dictionary in shared mode from modifications, for performing
|
|
foreign key check, rollback, or other operation invisible to MySQL. */
|
|
void row_mysql_freeze_data_dictionary_func(
|
|
trx_t *trx, /*!< in/out: transaction */
|
|
const char *file, /*!< in: file name */
|
|
ulint line) /*!< in: line number */
|
|
{
|
|
ut_a(trx->dict_operation_lock_mode == 0);
|
|
|
|
rw_lock_s_lock_inline(dict_operation_lock, 0, file, line);
|
|
|
|
trx->dict_operation_lock_mode = RW_S_LATCH;
|
|
}
|
|
|
|
/** Unlocks the data dictionary shared lock. */
|
|
void row_mysql_unfreeze_data_dictionary(trx_t *trx) /*!< in/out: transaction */
|
|
{
|
|
ut_a(trx->dict_operation_lock_mode == RW_S_LATCH);
|
|
|
|
rw_lock_s_unlock(dict_operation_lock);
|
|
|
|
trx->dict_operation_lock_mode = 0;
|
|
}
|
|
|
|
/** Locks the data dictionary exclusively for performing a table create or other
|
|
data dictionary modification operation. */
|
|
void row_mysql_lock_data_dictionary_func(trx_t *trx, /*!< in/out: transaction */
|
|
const char *file, /*!< in: file name */
|
|
ulint line) /*!< in: line number */
|
|
{
|
|
ut_a(trx->dict_operation_lock_mode == 0 ||
|
|
trx->dict_operation_lock_mode == RW_X_LATCH);
|
|
|
|
/* Serialize data dictionary operations with dictionary mutex:
|
|
no deadlocks or lock waits can occur then in these operations */
|
|
|
|
rw_lock_x_lock_inline(dict_operation_lock, 0, file, line);
|
|
trx->dict_operation_lock_mode = RW_X_LATCH;
|
|
|
|
mutex_enter(&dict_sys->mutex);
|
|
}
|
|
|
|
/** Unlocks the data dictionary exclusive lock. */
|
|
void row_mysql_unlock_data_dictionary(trx_t *trx) /*!< in/out: transaction */
|
|
{
|
|
ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
|
|
|
|
/* Serialize data dictionary operations with dictionary mutex:
|
|
no deadlocks can occur then in these operations */
|
|
|
|
mutex_exit(&dict_sys->mutex);
|
|
rw_lock_x_unlock(dict_operation_lock);
|
|
|
|
trx->dict_operation_lock_mode = 0;
|
|
}
|
|
|
|
/** Creates a table for MySQL. On success the in-memory table could be
|
|
kept in non-LRU list while on failure the 'table' object will be freed.
|
|
@param[in] table table definition(will be freed, or on
|
|
DB_SUCCESS added to the data dictionary cache)
|
|
@param[in] compression compression algorithm to use, can be nullptr
|
|
@param[in,out] trx transaction
|
|
@return error code or DB_SUCCESS */
|
|
dberr_t row_create_table_for_mysql(dict_table_t *table, const char *compression,
|
|
trx_t *trx) {
|
|
mem_heap_t *heap;
|
|
dberr_t err;
|
|
|
|
ut_ad(!mutex_own(&dict_sys->mutex));
|
|
|
|
DBUG_EXECUTE_IF("ib_create_table_fail_at_start_of_row_create_table_for_mysql",
|
|
{
|
|
dict_mem_table_free(table);
|
|
|
|
trx->op_info = "";
|
|
|
|
return (DB_ERROR);
|
|
});
|
|
|
|
trx->op_info = "creating table";
|
|
|
|
switch (trx_get_dict_operation(trx)) {
|
|
case TRX_DICT_OP_NONE:
|
|
case TRX_DICT_OP_TABLE:
|
|
break;
|
|
case TRX_DICT_OP_INDEX:
|
|
/* If the transaction was previously flagged as
|
|
TRX_DICT_OP_INDEX, we should be creating auxiliary
|
|
tables for full-text indexes. */
|
|
ut_ad(strstr(table->name.m_name, "/fts_") != NULL);
|
|
}
|
|
|
|
/* Assign table id and build table space. */
|
|
err = dict_build_table_def(table, trx);
|
|
if (err != DB_SUCCESS) {
|
|
trx->error_state = err;
|
|
goto error_handling;
|
|
}
|
|
|
|
if (err == DB_SUCCESS) {
|
|
heap = mem_heap_create(512);
|
|
|
|
dict_table_add_system_columns(table, heap);
|
|
|
|
mutex_enter(&dict_sys->mutex);
|
|
dict_table_add_to_cache(table, false, heap);
|
|
mutex_exit(&dict_sys->mutex);
|
|
|
|
/* During upgrade, etc., the log_ddl may haven't been
|
|
initialized and we don't need to write DDL logs too.
|
|
This can only happen for CREATE TABLE. */
|
|
if (log_ddl != nullptr) {
|
|
err = log_ddl->write_remove_cache_log(trx, table);
|
|
}
|
|
|
|
mem_heap_free(heap);
|
|
}
|
|
|
|
if (err == DB_SUCCESS && dict_table_is_file_per_table(table)) {
|
|
ut_ad(dict_table_is_file_per_table(table));
|
|
|
|
if (err == DB_SUCCESS && compression != NULL && compression[0] != '\0') {
|
|
ut_ad(!dict_table_in_shared_tablespace(table));
|
|
|
|
ut_ad(Compression::validate(compression) == DB_SUCCESS);
|
|
|
|
err = fil_set_compression(table, compression);
|
|
|
|
switch (err) {
|
|
case DB_SUCCESS:
|
|
break;
|
|
case DB_NOT_FOUND:
|
|
case DB_UNSUPPORTED:
|
|
case DB_IO_NO_PUNCH_HOLE_FS:
|
|
/* Return these errors */
|
|
break;
|
|
case DB_IO_NO_PUNCH_HOLE_TABLESPACE:
|
|
/* Page Compression will not be used. */
|
|
err = DB_SUCCESS;
|
|
break;
|
|
default:
|
|
ut_error;
|
|
}
|
|
|
|
/* We can check for file system punch hole support
|
|
only after creating the tablespace. On Windows
|
|
we can query that information but not on Linux. */
|
|
ut_ad(err == DB_SUCCESS || err == DB_IO_NO_PUNCH_HOLE_FS);
|
|
|
|
/* In non-strict mode we ignore dodgy compression
|
|
settings. */
|
|
}
|
|
}
|
|
error_handling:
|
|
switch (err) {
|
|
case DB_SUCCESS:
|
|
case DB_IO_NO_PUNCH_HOLE_FS:
|
|
break;
|
|
|
|
case DB_OUT_OF_FILE_SPACE:
|
|
case DB_TOO_MANY_CONCURRENT_TRXS:
|
|
|
|
if (err == DB_OUT_OF_FILE_SPACE) {
|
|
ib::warn(ER_IB_MSG_986) << "Cannot create table " << table->name
|
|
<< " because the tablespace is full";
|
|
}
|
|
|
|
trx->error_state = DB_SUCCESS;
|
|
|
|
/* Still do it here so that the table can always be freed */
|
|
if (dd_table_open_on_name_in_mem(table->name.m_name, false)) {
|
|
mutex_enter(&dict_sys->mutex);
|
|
|
|
dd_table_close(table, nullptr, nullptr, true);
|
|
|
|
dict_table_remove_from_cache(table);
|
|
|
|
mutex_exit(&dict_sys->mutex);
|
|
} else {
|
|
dict_mem_table_free(table);
|
|
}
|
|
|
|
break;
|
|
|
|
case DB_UNSUPPORTED:
|
|
case DB_DUPLICATE_KEY:
|
|
case DB_TABLESPACE_EXISTS:
|
|
default:
|
|
trx->error_state = DB_SUCCESS;
|
|
dict_mem_table_free(table);
|
|
break;
|
|
}
|
|
|
|
trx->op_info = "";
|
|
trx->dict_operation = TRX_DICT_OP_NONE;
|
|
|
|
return (err);
|
|
}
|
|
|
|
/** Does an index creation operation for MySQL. TODO: currently failure
|
|
to create an index results in dropping the whole table! This is no problem
|
|
currently as all indexes must be created at the same time as the table.
|
|
@return error number or DB_SUCCESS */
|
|
dberr_t row_create_index_for_mysql(
|
|
dict_index_t *index, /*!< in, own: index definition
|
|
(will be freed) */
|
|
trx_t *trx, /*!< in: transaction handle */
|
|
const ulint *field_lengths, /*!< in: if not NULL, must contain
|
|
dict_index_get_n_fields(index)
|
|
actual field lengths for the
|
|
index columns, which are
|
|
then checked for not being too
|
|
large. */
|
|
dict_table_t *handler) /*!< in/out: table handler. */
|
|
{
|
|
dberr_t err;
|
|
ulint i;
|
|
ulint len;
|
|
char *table_name;
|
|
char *index_name;
|
|
dict_table_t *table = NULL;
|
|
ibool is_fts;
|
|
THD *thd = current_thd;
|
|
|
|
trx->op_info = "creating index";
|
|
|
|
/* Copy the table name because we may want to drop the
|
|
table later, after the index object is freed (inside
|
|
que_run_threads()) and thus index->table_name is not available. */
|
|
table_name = mem_strdup(index->table_name);
|
|
index_name = mem_strdup(index->name);
|
|
|
|
is_fts = (index->type == DICT_FTS);
|
|
|
|
if (handler != NULL && handler->is_intrinsic()) {
|
|
table = handler;
|
|
}
|
|
|
|
if (table == NULL) {
|
|
table = dd_table_open_on_name(thd, NULL, table_name, false,
|
|
DICT_ERR_IGNORE_NONE);
|
|
} else {
|
|
table->acquire();
|
|
ut_ad(table->is_intrinsic());
|
|
}
|
|
|
|
for (i = 0; i < index->n_def; i++) {
|
|
/* Check that prefix_len and actual length
|
|
< DICT_MAX_INDEX_COL_LEN */
|
|
|
|
len = index->get_field(i)->prefix_len;
|
|
|
|
if (field_lengths && field_lengths[i]) {
|
|
len = ut_max(len, field_lengths[i]);
|
|
}
|
|
|
|
DBUG_EXECUTE_IF("ib_create_table_fail_at_create_index",
|
|
len = DICT_MAX_FIELD_LEN_BY_FORMAT(table) + 1;);
|
|
|
|
/* Column or prefix length exceeds maximum column length */
|
|
if (len > (ulint)DICT_MAX_FIELD_LEN_BY_FORMAT(table)) {
|
|
err = DB_TOO_BIG_INDEX_COL;
|
|
|
|
dict_mem_index_free(index);
|
|
goto error_handling;
|
|
}
|
|
}
|
|
|
|
trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
|
|
|
|
/* For temp-table we avoid insertion into SYSTEM TABLES to
|
|
maintain performance and so we have separate path that directly
|
|
just updates dictonary cache. */
|
|
if (!table->is_temporary()) {
|
|
/* Create B-tree */
|
|
dict_build_index_def(table, index, trx);
|
|
|
|
err = dict_index_add_to_cache_w_vcol(table, index, NULL, FIL_NULL,
|
|
trx_is_strict(trx));
|
|
|
|
if (err != DB_SUCCESS) {
|
|
goto error_handling;
|
|
}
|
|
|
|
index = UT_LIST_GET_LAST(table->indexes);
|
|
|
|
err = dict_create_index_tree_in_mem(index, trx);
|
|
|
|
if (err != DB_SUCCESS) {
|
|
goto error_handling;
|
|
}
|
|
|
|
} else {
|
|
dict_build_index_def(table, index, trx);
|
|
#ifdef UNIV_DEBUG
|
|
space_index_t index_id = index->id;
|
|
#endif
|
|
|
|
/* add index to dictionary cache and also free index object.
|
|
We allow instrinsic table to violate the size limits because
|
|
they are used by optimizer for all record formats. */
|
|
err = dict_index_add_to_cache(table, index, FIL_NULL,
|
|
!table->is_intrinsic() && trx_is_strict(trx));
|
|
|
|
if (err != DB_SUCCESS) {
|
|
goto error_handling;
|
|
}
|
|
|
|
index = UT_LIST_GET_LAST(table->indexes);
|
|
ut_ad(index->id == index_id);
|
|
|
|
/* as above function has freed index object re-load it
|
|
now from dictionary cache using index_id */
|
|
if (table->is_intrinsic()) {
|
|
/* trx_id field is used for tracking which transaction
|
|
created the index. For intrinsic table this is
|
|
ir-relevant and so re-use it for tracking consistent
|
|
view while processing SELECT as part of UPDATE. */
|
|
index->trx_id = ULINT_UNDEFINED;
|
|
}
|
|
ut_a(index != NULL);
|
|
index->table = table;
|
|
|
|
err = dict_create_index_tree_in_mem(index, trx);
|
|
|
|
if (err != DB_SUCCESS && !table->is_intrinsic()) {
|
|
mutex_enter(&dict_sys->mutex);
|
|
dict_index_remove_from_cache(table, index);
|
|
mutex_exit(&dict_sys->mutex);
|
|
}
|
|
}
|
|
|
|
/* Create the index specific FTS auxiliary tables. */
|
|
if (err == DB_SUCCESS && is_fts) {
|
|
dict_index_t *idx;
|
|
|
|
idx = dict_table_get_index_on_name(table, index_name);
|
|
|
|
ut_ad(idx);
|
|
err = fts_create_index_tables_low(trx, idx, table->name.m_name, table->id);
|
|
}
|
|
|
|
error_handling:
|
|
dd_table_close(table, thd, NULL, false);
|
|
|
|
trx->op_info = "";
|
|
trx->dict_operation = TRX_DICT_OP_NONE;
|
|
|
|
ut_free(table_name);
|
|
ut_free(index_name);
|
|
|
|
return (err);
|
|
}
|
|
|
|
/** Scans a table create SQL string and adds to the data dictionary
|
|
the foreign key constraints declared in the string. This function
|
|
should be called after the indexes for a table have been created.
|
|
Each foreign key constraint must be accompanied with indexes in
|
|
bot participating tables. The indexes are allowed to contain more
|
|
fields than mentioned in the constraint.
|
|
|
|
@param[in] trx transaction
|
|
@param[in] sql_string table create statement where
|
|
foreign keys are declared like:
|
|
FOREIGN KEY (a, b) REFERENCES table2(c, d),
|
|
table2 can be written also with the database
|
|
name before it: test.table2; the default
|
|
database id the database of parameter name
|
|
@param[in] sql_length length of sql_string
|
|
@param[in] name table full name in normalized form
|
|
@param[in] reject_fks if TRUE, fail with error code
|
|
DB_CANNOT_ADD_CONSTRAINT if any
|
|
foreign keys are found.
|
|
@param[in] dd_table MySQL dd::Table for the table
|
|
@return error code or DB_SUCCESS */
|
|
dberr_t row_table_add_foreign_constraints(trx_t *trx, const char *sql_string,
|
|
size_t sql_length, const char *name,
|
|
ibool reject_fks,
|
|
const dd::Table *dd_table) {
|
|
dberr_t err;
|
|
|
|
DBUG_TRACE;
|
|
|
|
ut_ad(mutex_own(&dict_sys->mutex));
|
|
ut_a(sql_string);
|
|
|
|
trx->op_info = "adding foreign keys";
|
|
|
|
trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
|
|
|
|
err = dict_create_foreign_constraints(trx, sql_string, sql_length, name,
|
|
reject_fks);
|
|
|
|
DBUG_EXECUTE_IF("ib_table_add_foreign_fail", err = DB_DUPLICATE_KEY;);
|
|
|
|
DEBUG_SYNC_C("table_add_foreign_constraints");
|
|
/* Check like this shouldn't be done for table that doesn't
|
|
have foreign keys but code still continues to run with void action.
|
|
Disable it for intrinsic table at-least */
|
|
if (err == DB_SUCCESS) {
|
|
/* Check that also referencing constraints are ok */
|
|
dict_names_t fk_tables;
|
|
THD *thd = trx->mysql_thd;
|
|
|
|
dd::cache::Dictionary_client *client = dd::get_dd_client(thd);
|
|
dd::cache::Dictionary_client::Auto_releaser releaser(client);
|
|
dict_table_t *table = dd_table_open_on_name_in_mem(name, true);
|
|
|
|
err = dd_table_load_fk(client, name, nullptr, table, dd_table, thd, true,
|
|
true, &fk_tables);
|
|
|
|
if (err != DB_SUCCESS) {
|
|
dd_table_close(table, NULL, NULL, true);
|
|
goto func_exit;
|
|
}
|
|
|
|
/* Check whether virtual column or stored column affects
|
|
the foreign key constraint of the table. */
|
|
|
|
if (dict_foreigns_has_s_base_col(table->foreign_set, table)) {
|
|
dd_table_close(table, NULL, NULL, true);
|
|
err = DB_NO_FK_ON_S_BASE_COL;
|
|
goto func_exit;
|
|
}
|
|
|
|
/* Fill the virtual column set in foreign when
|
|
the table undergoes copy alter operation. */
|
|
dict_mem_table_free_foreign_vcol_set(table);
|
|
dict_mem_table_fill_foreign_vcol_set(table);
|
|
|
|
dd_open_fk_tables(fk_tables, true, thd);
|
|
dd_table_close(table, NULL, NULL, true);
|
|
}
|
|
|
|
func_exit:
|
|
trx->op_info = "";
|
|
trx->dict_operation = TRX_DICT_OP_NONE;
|
|
|
|
return err;
|
|
}
|
|
|
|
/** Drops a table for MySQL as a background operation. MySQL relies on Unix
|
|
in ALTER TABLE to the fact that the table handler does not remove the
|
|
table before all handles to it has been removed. Furhermore, the MySQL's
|
|
call to drop table must be non-blocking. Therefore we do the drop table
|
|
as a background operation, which is taken care of by the master thread
|
|
in srv0srv.cc.
|
|
@return error code or DB_SUCCESS */
|
|
static dberr_t row_drop_table_for_mysql_in_background(
|
|
const char *name) /*!< in: table name */
|
|
{
|
|
dberr_t error;
|
|
trx_t *trx;
|
|
|
|
trx = trx_allocate_for_background();
|
|
|
|
/* If the original transaction was dropping a table referenced by
|
|
foreign keys, we must set the following to be able to drop the
|
|
table: */
|
|
|
|
trx->check_foreigns = false;
|
|
|
|
/* Check that there is enough reusable space in redo log files. */
|
|
log_free_check();
|
|
|
|
/* Try to drop the table in InnoDB */
|
|
|
|
error = row_drop_table_for_mysql(name, trx);
|
|
|
|
/* Flush the log to reduce probability that the .frm files and
|
|
the InnoDB data dictionary get out-of-sync if the user runs
|
|
with innodb_flush_log_at_trx_commit = 0 */
|
|
|
|
log_buffer_flush_to_disk();
|
|
|
|
trx_commit_for_mysql(trx);
|
|
|
|
trx_free_for_background(trx);
|
|
|
|
return (error);
|
|
}
|
|
|
|
/** TODO: NewDD: Need to check if there is need to keep background
|
|
drop, in such case, the thd would be NULL (no MDL can be acquired)
|
|
The master thread in srv0srv.cc calls this regularly to drop tables which
|
|
we must drop in background after queries to them have ended. Such lazy
|
|
dropping of tables is needed in ALTER TABLE on Unix.
|
|
@return how many tables dropped + remaining tables in list */
|
|
ulint row_drop_tables_for_mysql_in_background(void) {
|
|
row_mysql_drop_t *drop;
|
|
dict_table_t *table;
|
|
ulint n_tables;
|
|
ulint n_tables_dropped = 0;
|
|
THD *thd = current_thd;
|
|
loop:
|
|
mutex_enter(&row_drop_list_mutex);
|
|
|
|
ut_a(row_mysql_drop_list_inited);
|
|
|
|
drop = UT_LIST_GET_FIRST(row_mysql_drop_list);
|
|
|
|
n_tables = UT_LIST_GET_LEN(row_mysql_drop_list);
|
|
|
|
mutex_exit(&row_drop_list_mutex);
|
|
|
|
if (drop == NULL) {
|
|
/* All tables dropped */
|
|
if (thd != nullptr) {
|
|
/* All these kind of table should not be
|
|
intrinsic ones, so this is no need later. */
|
|
UT_DELETE(thd_to_innodb_session(thd));
|
|
thd_to_innodb_session(thd) = NULL;
|
|
}
|
|
|
|
return (n_tables + n_tables_dropped);
|
|
}
|
|
|
|
DBUG_EXECUTE_IF("row_drop_tables_in_background_sleep",
|
|
os_thread_sleep(5000000););
|
|
|
|
/* TODO: NewDD: we cannot get MDL lock here, as thd could be NULL */
|
|
table = dd_table_open_on_name(thd, NULL, drop->table_name, false,
|
|
DICT_ERR_IGNORE_NONE);
|
|
|
|
if (table == NULL) {
|
|
/* If for some reason the table has already been dropped
|
|
through some other mechanism, do not try to drop it */
|
|
|
|
goto already_dropped;
|
|
}
|
|
|
|
if (!table->to_be_dropped) {
|
|
/* There is a scenario: the old table is dropped
|
|
just after it's added into drop list, and new
|
|
table with the same name is created, then we try
|
|
to drop the new table in background. */
|
|
dd_table_close(table, NULL, NULL, false);
|
|
|
|
goto already_dropped;
|
|
}
|
|
|
|
ut_a(!table->can_be_evicted);
|
|
|
|
dd_table_close(table, NULL, NULL, false);
|
|
|
|
if (DB_SUCCESS != row_drop_table_for_mysql_in_background(drop->table_name)) {
|
|
/* If the DROP fails for some table, we return, and let the
|
|
main thread retry later */
|
|
|
|
if (thd != nullptr) {
|
|
/* All these kind of table should not be
|
|
intrinsic ones, so this is no need later. */
|
|
UT_DELETE(thd_to_innodb_session(thd));
|
|
thd_to_innodb_session(thd) = NULL;
|
|
}
|
|
|
|
return (n_tables + n_tables_dropped);
|
|
}
|
|
|
|
n_tables_dropped++;
|
|
|
|
already_dropped:
|
|
mutex_enter(&row_drop_list_mutex);
|
|
|
|
UT_LIST_REMOVE(row_mysql_drop_list, drop);
|
|
|
|
MONITOR_DEC(MONITOR_BACKGROUND_DROP_TABLE);
|
|
|
|
ib::info(ER_IB_MSG_987) << "Dropped table "
|
|
<< ut_get_name(NULL, drop->table_name)
|
|
<< " in background drop queue.",
|
|
|
|
ut_free(drop->table_name);
|
|
|
|
ut_free(drop);
|
|
|
|
mutex_exit(&row_drop_list_mutex);
|
|
|
|
goto loop;
|
|
}
|
|
|
|
/** Get the background drop list length. NOTE: the caller must own the
|
|
drop list mutex!
|
|
@return how many tables in list */
|
|
ulint row_get_background_drop_list_len_low(void) {
|
|
ulint len;
|
|
|
|
mutex_enter(&row_drop_list_mutex);
|
|
|
|
ut_a(row_mysql_drop_list_inited);
|
|
|
|
len = UT_LIST_GET_LEN(row_mysql_drop_list);
|
|
|
|
mutex_exit(&row_drop_list_mutex);
|
|
|
|
return (len);
|
|
}
|
|
|
|
/** If a table is not yet in the drop list, adds the table to the list of tables
|
|
which the master thread drops in background. We need this on Unix because in
|
|
ALTER TABLE MySQL may call drop table even if the table has running queries on
|
|
it. Also, if there are running foreign key checks on the table, we drop the
|
|
table lazily.
|
|
@return true if the table was not yet in the drop list, and was added there */
|
|
static ibool row_add_table_to_background_drop_list(
|
|
const char *name) /*!< in: table name */
|
|
{
|
|
/* WL6049, remove after WL6049. */
|
|
ut_ad(0);
|
|
|
|
row_mysql_drop_t *drop;
|
|
|
|
mutex_enter(&row_drop_list_mutex);
|
|
|
|
ut_a(row_mysql_drop_list_inited);
|
|
|
|
/* Look if the table already is in the drop list */
|
|
for (drop = UT_LIST_GET_FIRST(row_mysql_drop_list); drop != NULL;
|
|
drop = UT_LIST_GET_NEXT(row_mysql_drop_list, drop)) {
|
|
if (strcmp(drop->table_name, name) == 0) {
|
|
/* Already in the list */
|
|
|
|
mutex_exit(&row_drop_list_mutex);
|
|
|
|
return (FALSE);
|
|
}
|
|
}
|
|
|
|
drop = static_cast<row_mysql_drop_t *>(
|
|
ut_malloc_nokey(sizeof(row_mysql_drop_t)));
|
|
|
|
drop->table_name = mem_strdup(name);
|
|
|
|
UT_LIST_ADD_LAST(row_mysql_drop_list, drop);
|
|
|
|
MONITOR_INC(MONITOR_BACKGROUND_DROP_TABLE);
|
|
|
|
mutex_exit(&row_drop_list_mutex);
|
|
|
|
return (TRUE);
|
|
}
|
|
|
|
/** Reassigns the table identifier of a table.
|
|
@param[in,out] table table
|
|
@param[out] new_id new table id
|
|
@return error code or DB_SUCCESS */
|
|
static dberr_t row_mysql_table_id_reassign(dict_table_t *table,
|
|
table_id_t *new_id) {
|
|
dict_hdr_get_new_id(new_id, NULL, NULL, table, false);
|
|
|
|
/* Remove all locks except the table-level S and X locks. */
|
|
lock_remove_all_on_table(table, FALSE);
|
|
|
|
return (DB_SUCCESS);
|
|
}
|
|
|
|
/** Setup the pre-requisites for DISCARD TABLESPACE. It will start the
|
|
transaction, acquire the data dictionary lock in X mode and open the table.
|
|
@return table instance or 0 if not found. */
|
|
static dict_table_t *row_discard_tablespace_begin(
|
|
const char *name, /*!< in: table name */
|
|
trx_t *trx) /*!< in: transaction handle */
|
|
{
|
|
trx->op_info = "discarding tablespace";
|
|
|
|
// trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
|
|
|
|
trx_start_if_not_started_xa(trx, true);
|
|
|
|
/* Serialize data dictionary operations with dictionary mutex:
|
|
this is to avoid deadlocks during data dictionary operations */
|
|
|
|
row_mysql_lock_data_dictionary(trx);
|
|
|
|
dict_table_t *table;
|
|
THD *thd = current_thd;
|
|
|
|
table = dd_table_open_on_name(thd, NULL, name, true, DICT_ERR_IGNORE_NONE);
|
|
|
|
if (table) {
|
|
dict_stats_wait_bg_to_stop_using_table(table, trx);
|
|
ut_a(dict_table_is_file_per_table(table));
|
|
ut_a(table->n_foreign_key_checks_running == 0);
|
|
}
|
|
|
|
return (table);
|
|
}
|
|
|
|
/** Do the foreign key constraint checks.
|
|
@return DB_SUCCESS or error code. */
|
|
static dberr_t row_discard_tablespace_foreign_key_checks(
|
|
const trx_t *trx, /*!< in: transaction handle */
|
|
const dict_table_t *table) /*!< in: table to be discarded */
|
|
{
|
|
if (srv_read_only_mode || !trx->check_foreigns) {
|
|
return (DB_SUCCESS);
|
|
}
|
|
|
|
/* Check if the table is referenced by foreign key constraints from
|
|
some other table (not the table itself) */
|
|
dict_foreign_set::iterator it =
|
|
std::find_if(table->referenced_set.begin(), table->referenced_set.end(),
|
|
dict_foreign_different_tables());
|
|
|
|
if (it == table->referenced_set.end()) {
|
|
return (DB_SUCCESS);
|
|
}
|
|
|
|
const dict_foreign_t *foreign = *it;
|
|
FILE *ef = dict_foreign_err_file;
|
|
|
|
ut_ad(foreign->foreign_table != table);
|
|
ut_ad(foreign->referenced_table == table);
|
|
|
|
/* We only allow discarding a referenced table if
|
|
FOREIGN_KEY_CHECKS is set to 0 */
|
|
|
|
mutex_enter(&dict_foreign_err_mutex);
|
|
|
|
rewind(ef);
|
|
|
|
ut_print_timestamp(ef);
|
|
|
|
fputs(" Cannot DISCARD table ", ef);
|
|
ut_print_name(ef, trx, table->name.m_name);
|
|
fputs(
|
|
"\n"
|
|
"because it is referenced by ",
|
|
ef);
|
|
ut_print_name(ef, trx, foreign->foreign_table_name);
|
|
putc('\n', ef);
|
|
|
|
mutex_exit(&dict_foreign_err_mutex);
|
|
|
|
return (DB_CANNOT_DROP_CONSTRAINT);
|
|
}
|
|
|
|
/** Cleanup after the DISCARD TABLESPACE operation.
|
|
@param[in,out] trx transaction handle
|
|
@param[in,out] table table to be discarded
|
|
@param[in] err error code
|
|
@param[in,out] aux_vec fts aux table name vector
|
|
@return error code. */
|
|
static dberr_t row_discard_tablespace_end(trx_t *trx, dict_table_t *table,
|
|
dberr_t err,
|
|
aux_name_vec_t *aux_vec) {
|
|
bool file_per_table = true;
|
|
if (table != nullptr) {
|
|
file_per_table = dict_table_is_file_per_table(table);
|
|
dd_table_close(table, trx->mysql_thd, nullptr, true);
|
|
}
|
|
|
|
DBUG_EXECUTE_IF("ib_discard_before_commit_crash",
|
|
log_make_latest_checkpoint();
|
|
DBUG_SUICIDE(););
|
|
|
|
DBUG_EXECUTE_IF("ib_discard_after_commit_crash", log_make_latest_checkpoint();
|
|
DBUG_SUICIDE(););
|
|
|
|
row_mysql_unlock_data_dictionary(trx);
|
|
|
|
if (aux_vec->aux_name.size() > 0) {
|
|
if (!fts_drop_dd_tables(aux_vec, file_per_table)) {
|
|
err = DB_ERROR;
|
|
}
|
|
fts_free_aux_names(aux_vec);
|
|
}
|
|
|
|
trx->op_info = "";
|
|
|
|
return (err);
|
|
}
|
|
|
|
/** Do the DISCARD TABLESPACE operation.
|
|
@param[in,out] trx transaction handle
|
|
@param[in,out] table table to be discarded
|
|
@param[in,out] aux_vec fts aux table name vector
|
|
@return DB_SUCCESS or error code. */
|
|
static dberr_t row_discard_tablespace(trx_t *trx, dict_table_t *table,
|
|
aux_name_vec_t *aux_vec) {
|
|
dberr_t err;
|
|
|
|
/* How do we prevent crashes caused by ongoing operations on
|
|
the table? Old operations could try to access non-existent
|
|
pages. MySQL will block all DML on the table using MDL and a
|
|
DISCARD will not start unless all existing operations on the
|
|
table to be discarded are completed.
|
|
|
|
1) Acquire the data dictionary latch in X mode. To prevent any
|
|
internal operations that MySQL is not aware off and also for
|
|
the internal SQL parser.
|
|
|
|
2) Purge and rollback: we assign a new table id for the
|
|
table. Since purge and rollback look for the table based on
|
|
the table id, they see the table as 'dropped' and discard
|
|
their operations.
|
|
|
|
3) Insert buffer: we remove all entries for the tablespace in
|
|
the insert buffer tree.
|
|
|
|
4) FOREIGN KEY operations: if table->n_foreign_key_checks_running > 0,
|
|
we do not allow the discard. */
|
|
|
|
/* For SDI tables, acquire exclusive MDL and set sdi_table->ibd_file_missing
|
|
to true. Purge on SDI table acquire shared MDL & also check for missing
|
|
flag. */
|
|
mutex_exit(&dict_sys->mutex);
|
|
MDL_ticket *sdi_mdl = nullptr;
|
|
err = dd_sdi_acquire_exclusive_mdl(trx->mysql_thd, table->space, &sdi_mdl);
|
|
if (err != DB_SUCCESS) {
|
|
return (err);
|
|
}
|
|
mutex_enter(&dict_sys->mutex);
|
|
|
|
/* Play safe and remove all insert buffer entries, though we should
|
|
have removed them already when DISCARD TABLESPACE was called */
|
|
|
|
ibuf_delete_for_discarded_space(table->space);
|
|
|
|
table_id_t new_id;
|
|
|
|
/* Drop all the FTS auxiliary tables. */
|
|
if (dict_table_has_fts_index(table) ||
|
|
DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
|
|
fts_drop_tables(trx, table, aux_vec);
|
|
}
|
|
|
|
/* Assign a new space ID to the table definition so that purge
|
|
can ignore the changes. Update the system table on disk. */
|
|
|
|
err = row_mysql_table_id_reassign(table, &new_id);
|
|
|
|
if (err != DB_SUCCESS) {
|
|
return (err);
|
|
}
|
|
|
|
/* Discard the physical file that is used for the tablespace. */
|
|
err = fil_discard_tablespace(table->space);
|
|
|
|
switch (err) {
|
|
case DB_SUCCESS:
|
|
case DB_IO_ERROR:
|
|
case DB_TABLESPACE_NOT_FOUND:
|
|
/* All persistent operations successful, update the
|
|
data dictionary memory cache. */
|
|
|
|
table->ibd_file_missing = TRUE;
|
|
|
|
table->flags2 |= DICT_TF2_DISCARDED;
|
|
|
|
dict_table_change_id_in_cache(table, new_id);
|
|
|
|
/* Reset the root page numbers. */
|
|
|
|
for (dict_index_t *index = UT_LIST_GET_FIRST(table->indexes); index != 0;
|
|
index = UT_LIST_GET_NEXT(indexes, index)) {
|
|
index->page = FIL_NULL;
|
|
index->space = FIL_NULL;
|
|
}
|
|
|
|
/* Set SDI tables that ibd is missing */
|
|
{
|
|
dict_table_t *sdi_table = dict_sdi_get_table(table->space, true, false);
|
|
if (sdi_table) {
|
|
sdi_table->ibd_file_missing = true;
|
|
dict_sdi_close_table(sdi_table);
|
|
}
|
|
}
|
|
|
|
/* If the tablespace did not already exist or we couldn't
|
|
write to it, we treat that as a successful DISCARD. It is
|
|
unusable anyway. */
|
|
|
|
err = DB_SUCCESS;
|
|
break;
|
|
|
|
default:
|
|
/* We need to rollback the disk changes, something failed. */
|
|
|
|
trx->error_state = DB_SUCCESS;
|
|
|
|
trx_rollback_to_savepoint(trx, NULL);
|
|
|
|
trx->error_state = DB_SUCCESS;
|
|
}
|
|
|
|
return (err);
|
|
}
|
|
|
|
/** Discards the tablespace of a table which stored in an .ibd file. Discarding
|
|
means that this function renames the .ibd file and assigns a new table id for
|
|
the table. Also the flag table->ibd_file_missing is set to TRUE.
|
|
@return error code or DB_SUCCESS */
|
|
dberr_t row_discard_tablespace_for_mysql(
|
|
const char *name, /*!< in: table name */
|
|
trx_t *trx) /*!< in: transaction handle */
|
|
{
|
|
dberr_t err;
|
|
dict_table_t *table;
|
|
aux_name_vec_t aux_vec;
|
|
|
|
/* Open the table and start the transaction if not started. */
|
|
|
|
table = row_discard_tablespace_begin(name, trx);
|
|
|
|
if (table == 0) {
|
|
err = DB_TABLE_NOT_FOUND;
|
|
} else if (table->is_temporary()) {
|
|
ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
|
|
ER_CANNOT_DISCARD_TEMPORARY_TABLE);
|
|
|
|
err = DB_ERROR;
|
|
|
|
} else if (table->space == TRX_SYS_SPACE) {
|
|
char table_name[MAX_FULL_NAME_LEN + 1];
|
|
|
|
innobase_format_name(table_name, sizeof(table_name), table->name.m_name);
|
|
|
|
ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
|
|
ER_TABLE_IN_SYSTEM_TABLESPACE, table_name);
|
|
|
|
err = DB_ERROR;
|
|
|
|
} else if (table->n_foreign_key_checks_running > 0) {
|
|
char table_name[MAX_FULL_NAME_LEN + 1];
|
|
|
|
innobase_format_name(table_name, sizeof(table_name), table->name.m_name);
|
|
|
|
ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
|
|
ER_DISCARD_FK_CHECKS_RUNNING, table_name);
|
|
|
|
err = DB_ERROR;
|
|
|
|
} else {
|
|
/* Do foreign key constraint checks. */
|
|
|
|
err = row_discard_tablespace_foreign_key_checks(trx, table);
|
|
|
|
if (err == DB_SUCCESS) {
|
|
err = row_discard_tablespace(trx, table, &aux_vec);
|
|
}
|
|
}
|
|
|
|
return (row_discard_tablespace_end(trx, table, err, &aux_vec));
|
|
}
|
|
|
|
/** Sets an exclusive lock on a table.
|
|
@return error code or DB_SUCCESS */
|
|
dberr_t row_mysql_lock_table(
|
|
trx_t *trx, /*!< in/out: transaction */
|
|
dict_table_t *table, /*!< in: table to lock */
|
|
enum lock_mode mode, /*!< in: LOCK_X or LOCK_S */
|
|
const char *op_info) /*!< in: string for trx->op_info */
|
|
{
|
|
mem_heap_t *heap;
|
|
que_thr_t *thr;
|
|
dberr_t err;
|
|
sel_node_t *node;
|
|
|
|
ut_ad(trx);
|
|
ut_ad(mode == LOCK_X || mode == LOCK_S);
|
|
|
|
heap = mem_heap_create(512);
|
|
|
|
trx->op_info = op_info;
|
|
|
|
node = sel_node_create(heap);
|
|
thr = pars_complete_graph_for_exec(node, trx, heap, NULL);
|
|
thr->graph->state = QUE_FORK_ACTIVE;
|
|
|
|
/* We use the select query graph as the dummy graph needed
|
|
in the lock module call */
|
|
|
|
thr = que_fork_get_first_thr(
|
|
static_cast<que_fork_t *>(que_node_get_parent(thr)));
|
|
|
|
que_thr_move_to_run_state_for_mysql(thr, trx);
|
|
|
|
run_again:
|
|
thr->run_node = thr;
|
|
thr->prev_node = thr->common.parent;
|
|
|
|
err = lock_table(0, table, mode, thr);
|
|
|
|
trx->error_state = err;
|
|
|
|
if (err == DB_SUCCESS) {
|
|
que_thr_stop_for_mysql_no_error(thr, trx);
|
|
} else {
|
|
que_thr_stop_for_mysql(thr);
|
|
|
|
auto was_lock_wait = row_mysql_handle_errors(&err, trx, thr, NULL);
|
|
|
|
if (was_lock_wait) {
|
|
goto run_again;
|
|
}
|
|
}
|
|
|
|
que_graph_free(thr->graph);
|
|
trx->op_info = "";
|
|
|
|
return (err);
|
|
}
|
|
|
|
/** Drop ancillary FTS tables as part of dropping a table.
|
|
@param[in,out] table Table cache entry
|
|
@param[in,out] aux_vec Fts aux table name vector
|
|
@param[in,out] trx Transaction handle
|
|
@return error code or DB_SUCCESS */
|
|
UNIV_INLINE
|
|
dberr_t row_drop_ancillary_fts_tables(dict_table_t *table,
|
|
aux_name_vec_t *aux_vec, trx_t *trx) {
|
|
/* Drop ancillary FTS tables */
|
|
if (dict_table_has_fts_index(table) ||
|
|
DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
|
|
ut_ad(table->get_ref_count() == 0);
|
|
ut_ad(trx_is_started(trx));
|
|
|
|
dberr_t err = fts_drop_tables(trx, table, aux_vec);
|
|
|
|
if (err != DB_SUCCESS) {
|
|
ib::error(ER_IB_MSG_988) << " Unable to remove ancillary FTS"
|
|
" tables for table "
|
|
<< table->name << " : " << ut_strerr(err);
|
|
|
|
return (err);
|
|
}
|
|
}
|
|
|
|
/* The table->fts flag can be set on the table for which
|
|
the cluster index is being rebuilt. Such table might not have
|
|
DICT_TF2_FTS flag set. So keep this out of above
|
|
dict_table_has_fts_index condition */
|
|
if (table->fts != NULL) {
|
|
fts_free(table);
|
|
}
|
|
|
|
return (DB_SUCCESS);
|
|
}
|
|
|
|
/** Drop a table from the memory cache as part of dropping a table.
|
|
@param[in,out] table Table cache entry
|
|
@param[in,out] trx Transaction handle
|
|
@return error code or DB_SUCCESS */
|
|
UNIV_INLINE
|
|
dberr_t row_drop_table_from_cache(dict_table_t *table, trx_t *trx) {
|
|
/* Remove the pointer to this table object from the list
|
|
of modified tables by the transaction because the object
|
|
is going to be destroyed below. */
|
|
trx->mod_tables.erase(table);
|
|
|
|
if (!table->is_intrinsic()) {
|
|
btr_drop_ahi_for_table(table);
|
|
dict_table_remove_from_cache(table);
|
|
} else {
|
|
for (dict_index_t *index = UT_LIST_GET_FIRST(table->indexes); index != NULL;
|
|
index = UT_LIST_GET_FIRST(table->indexes)) {
|
|
rw_lock_free(&index->lock);
|
|
|
|
UT_LIST_REMOVE(table->indexes, index);
|
|
|
|
dict_mem_index_free(index);
|
|
}
|
|
|
|
dict_mem_table_free(table);
|
|
table = NULL;
|
|
}
|
|
|
|
return (DB_SUCCESS);
|
|
}
|
|
|
|
/** Drop a tablespace as part of dropping or renaming a table.
|
|
This deletes the fil_space_t if found and the file on disk.
|
|
@param[in] space_id Tablespace ID
|
|
@param[in] filepath File path of tablespace to delete
|
|
@return error code or DB_SUCCESS */
|
|
dberr_t row_drop_tablespace(space_id_t space_id, const char *filepath) {
|
|
dberr_t err = DB_SUCCESS;
|
|
|
|
/* If the tablespace is not in the cache, just delete the file. */
|
|
if (!fil_space_exists_in_mem(space_id, nullptr, true, false, NULL, 0)) {
|
|
/* Force a delete of any discarded or temporary files. */
|
|
if (fil_delete_file(filepath)) {
|
|
ib::info(ER_IB_MSG_989) << "Removed datafile " << filepath;
|
|
|
|
} else {
|
|
ib::info(ER_IB_MSG_990)
|
|
<< "Failed to delete the datafile '" << filepath << "'!";
|
|
}
|
|
|
|
} else {
|
|
err = fil_delete_tablespace(space_id, BUF_REMOVE_FLUSH_NO_WRITE, nullptr);
|
|
|
|
if (err != DB_SUCCESS && err != DB_TABLESPACE_NOT_FOUND) {
|
|
ib::error(ER_IB_MSG_991)
|
|
<< "Failed to delete the datafile of"
|
|
<< " tablespace " << space_id << ", file '" << filepath << "'!";
|
|
}
|
|
}
|
|
|
|
return (err);
|
|
}
|
|
|
|
/** Drop a table for MySQL.
|
|
If the data dictionary was not already locked by the transaction,
|
|
the transaction will be committed. Otherwise, the data dictionary
|
|
will remain locked.
|
|
@param[in] name Table name
|
|
@param[in] trx Transaction handle
|
|
@param[in] nonatomic Whether it is permitted to release
|
|
and reacquire dict_operation_lock
|
|
@param[in,out] handler Table handler
|
|
@return error code or DB_SUCCESS */
|
|
dberr_t row_drop_table_for_mysql(const char *name, trx_t *trx, bool nonatomic,
|
|
dict_table_t *handler) {
|
|
dberr_t err = DB_SUCCESS;
|
|
dict_table_t *table = NULL;
|
|
char *filepath = NULL;
|
|
bool locked_dictionary = false;
|
|
THD *thd = trx->mysql_thd;
|
|
dd::Table *table_def = nullptr;
|
|
bool file_per_table = false;
|
|
aux_name_vec_t aux_vec;
|
|
|
|
DBUG_TRACE;
|
|
DBUG_PRINT("row_drop_table_for_mysql", ("table: '%s'", name));
|
|
|
|
ut_a(name != NULL);
|
|
|
|
/* Serialize data dictionary operations with dictionary mutex:
|
|
no deadlocks can occur then in these operations */
|
|
|
|
trx->op_info = "dropping table";
|
|
|
|
if (handler != NULL && handler->is_intrinsic()) {
|
|
table = handler;
|
|
}
|
|
|
|
if (table == NULL) {
|
|
if (trx->dict_operation_lock_mode != RW_X_LATCH) {
|
|
/* Prevent foreign key checks etc. while we are
|
|
dropping the table */
|
|
|
|
row_mysql_lock_data_dictionary(trx);
|
|
|
|
locked_dictionary = true;
|
|
nonatomic = true;
|
|
}
|
|
|
|
ut_ad(mutex_own(&dict_sys->mutex));
|
|
ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
|
|
|
|
table = dict_table_check_if_in_cache_low(name);
|
|
/* If it's called from server, then it should exist in cache */
|
|
if (table == NULL) {
|
|
/* MDL should already be held by server */
|
|
table = dd_table_open_on_name(
|
|
thd, NULL, name, true,
|
|
DICT_ERR_IGNORE_INDEX_ROOT | DICT_ERR_IGNORE_CORRUPT);
|
|
} else {
|
|
table->acquire();
|
|
}
|
|
|
|
/* Need to exclusive lock all AUX tables for drop table */
|
|
if (table && table->fts) {
|
|
mutex_exit(&dict_sys->mutex);
|
|
err = fts_lock_all_aux_tables(thd, table);
|
|
mutex_enter(&dict_sys->mutex);
|
|
|
|
if (err != DB_SUCCESS) {
|
|
dd_table_close(table, nullptr, nullptr, true);
|
|
goto funct_exit;
|
|
}
|
|
}
|
|
} else {
|
|
table->acquire();
|
|
ut_ad(table->is_intrinsic());
|
|
}
|
|
|
|
if (!table) {
|
|
err = DB_TABLE_NOT_FOUND;
|
|
goto funct_exit;
|
|
}
|
|
|
|
file_per_table = dict_table_is_file_per_table(table);
|
|
|
|
/* Acquire MDL on SDI table of tablespace. This is to prevent
|
|
concurrent DROP while purge is happening on SDI table */
|
|
if (file_per_table) {
|
|
MDL_ticket *sdi_mdl = nullptr;
|
|
mutex_exit(&dict_sys->mutex);
|
|
err = dd_sdi_acquire_exclusive_mdl(thd, table->space, &sdi_mdl);
|
|
mutex_enter(&dict_sys->mutex);
|
|
|
|
if (err != DB_SUCCESS) {
|
|
dd_table_close(table, nullptr, nullptr, true);
|
|
goto funct_exit;
|
|
}
|
|
}
|
|
|
|
/* This function is called recursively via fts_drop_tables(). */
|
|
if (!trx_is_started(trx)) {
|
|
if (!table->is_temporary()) {
|
|
trx_start_if_not_started(trx, true);
|
|
} else {
|
|
trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
|
|
}
|
|
}
|
|
|
|
/* Turn on this drop bit before we could release the dictionary
|
|
latch */
|
|
table->to_be_dropped = true;
|
|
|
|
if (nonatomic) {
|
|
/* This trx did not acquire any locks on dictionary
|
|
table records yet. Thus it is safe to release and
|
|
reacquire the data dictionary latches. */
|
|
if (table->fts) {
|
|
ut_ad(!table->fts->add_wq);
|
|
|
|
row_mysql_unlock_data_dictionary(trx);
|
|
fts_optimize_remove_table(table);
|
|
row_mysql_lock_data_dictionary(trx);
|
|
}
|
|
|
|
/* Do not bother to deal with persistent stats for temp
|
|
tables since we know temp tables do not use persistent
|
|
stats. */
|
|
if (!table->is_temporary()) {
|
|
dict_stats_wait_bg_to_stop_using_table(table, trx);
|
|
}
|
|
}
|
|
|
|
/* make sure background stats thread is not running on the table */
|
|
ut_ad(!(table->stats_bg_flag & BG_STAT_IN_PROGRESS));
|
|
|
|
if (!table->is_temporary() && !table->is_fts_aux()) {
|
|
if (srv_thread_is_active(srv_threads.m_dict_stats)) {
|
|
dict_stats_recalc_pool_del(table);
|
|
}
|
|
|
|
/* Remove stats for this table and all of its indexes from the
|
|
persistent storage if it exists and if there are stats for this
|
|
table in there. This function creates its own trx and commits
|
|
it. */
|
|
char errstr[1024];
|
|
err = dict_stats_drop_table(name, errstr, sizeof(errstr));
|
|
|
|
if (err != DB_SUCCESS) {
|
|
ib::warn(ER_IB_MSG_992) << errstr;
|
|
}
|
|
}
|
|
|
|
if (!table->is_intrinsic()) {
|
|
dict_table_prevent_eviction(table);
|
|
}
|
|
|
|
dd_table_close(table, thd, NULL, true);
|
|
|
|
/* Check if the table is referenced by foreign key constraints from
|
|
some other table now happens on SQL-layer. */
|
|
|
|
DBUG_EXECUTE_IF("row_drop_table_add_to_background",
|
|
row_add_table_to_background_drop_list(table->name.m_name);
|
|
err = DB_SUCCESS; goto funct_exit;);
|
|
|
|
/* TODO: could we replace the counter n_foreign_key_checks_running
|
|
with lock checks on the table? Acquire here an exclusive lock on the
|
|
table, and rewrite lock0lock.cc and the lock wait in srv0srv.cc so that
|
|
they can cope with the table having been dropped here? Foreign key
|
|
checks take an IS or IX lock on the table. */
|
|
|
|
if (table->n_foreign_key_checks_running > 0) {
|
|
const char *save_tablename = table->name.m_name;
|
|
ibool added;
|
|
|
|
added = row_add_table_to_background_drop_list(save_tablename);
|
|
|
|
if (added) {
|
|
ib::info(ER_IB_MSG_993) << "You are trying to drop table " << table->name
|
|
<< " though there is a foreign key check"
|
|
" running on it. Adding the table to the"
|
|
" background drop queue.";
|
|
|
|
/* We return DB_SUCCESS to MySQL though the drop will
|
|
happen lazily later */
|
|
|
|
err = DB_SUCCESS;
|
|
} else {
|
|
/* The table is already in the background drop list */
|
|
err = DB_ERROR;
|
|
}
|
|
|
|
goto funct_exit;
|
|
}
|
|
|
|
/* Remove all locks that are on the table or its records, if there
|
|
are no references to the table but it has record locks, we release
|
|
the record locks unconditionally. One use case is:
|
|
|
|
CREATE TABLE t2 (PRIMARY KEY (a)) SELECT * FROM t1;
|
|
|
|
If after the user transaction has done the SELECT and there is a
|
|
problem in completing the CREATE TABLE operation, MySQL will drop
|
|
the table. InnoDB will create a new background transaction to do the
|
|
actual drop, the trx instance that is passed to this function. To
|
|
preserve existing behaviour we remove the locks but ideally we
|
|
shouldn't have to. There should never be record locks on a table
|
|
that is going to be dropped. */
|
|
if (table->get_ref_count() == 0) {
|
|
/* We don't take lock on intrinsic table so nothing to remove.*/
|
|
if (!table->is_intrinsic()) {
|
|
lock_remove_all_on_table(table, TRUE);
|
|
}
|
|
ut_a(table->n_rec_locks == 0);
|
|
} else if (table->get_ref_count() > 0 || table->n_rec_locks > 0) {
|
|
ibool added;
|
|
|
|
ut_ad(0);
|
|
|
|
ut_ad(!table->is_intrinsic());
|
|
|
|
added = row_add_table_to_background_drop_list(table->name.m_name);
|
|
|
|
if (added) {
|
|
ib::info(ER_IB_MSG_994) << "MySQL is trying to drop table " << table->name
|
|
<< " though there are still open handles to"
|
|
" it. Adding the table to the background drop"
|
|
" queue.";
|
|
|
|
/* We return DB_SUCCESS to MySQL though the drop will
|
|
happen lazily later */
|
|
err = DB_SUCCESS;
|
|
} else {
|
|
/* The table is already in the background drop list */
|
|
err = DB_ERROR;
|
|
}
|
|
|
|
goto funct_exit;
|
|
}
|
|
|
|
/* The "to_be_dropped" marks table that is to be dropped, but
|
|
has not been dropped, instead, was put in the background drop
|
|
list due to being used by concurrent DML operations. Clear it
|
|
here since there are no longer any concurrent activities on it,
|
|
and it is free to be dropped */
|
|
table->to_be_dropped = false;
|
|
|
|
/* If we get this far then the table to be dropped must not have
|
|
any table or record locks on it. */
|
|
|
|
ut_a(table->is_intrinsic() || !lock_table_has_locks(table));
|
|
|
|
switch (trx_get_dict_operation(trx)) {
|
|
case TRX_DICT_OP_NONE:
|
|
trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
|
|
case TRX_DICT_OP_TABLE:
|
|
break;
|
|
case TRX_DICT_OP_INDEX:
|
|
/* If the transaction was previously flagged as
|
|
TRX_DICT_OP_INDEX, we should be dropping auxiliary
|
|
tables for full-text indexes or temp tables. */
|
|
ut_ad(strstr(table->name.m_name, "/fts_") != NULL ||
|
|
strstr(table->name.m_name, TEMP_FILE_PREFIX_INNODB) != NULL);
|
|
}
|
|
|
|
if (!table->is_temporary() && !file_per_table) {
|
|
mutex_exit(&dict_sys->mutex);
|
|
for (dict_index_t *index = table->first_index();
|
|
err == DB_SUCCESS && index != NULL; index = index->next()) {
|
|
err = log_ddl->write_free_tree_log(trx, index, true);
|
|
}
|
|
mutex_enter(&dict_sys->mutex);
|
|
if (err != DB_SUCCESS) {
|
|
goto funct_exit;
|
|
}
|
|
}
|
|
|
|
/* Mark all indexes unavailable in the data dictionary cache
|
|
before starting to drop the table. */
|
|
for (dict_index_t *index = table->first_index(); index != NULL;
|
|
index = index->next()) {
|
|
page_no_t page;
|
|
|
|
rw_lock_x_lock(dict_index_get_lock(index));
|
|
page = index->page;
|
|
/* Mark the index unusable. */
|
|
index->page = FIL_NULL;
|
|
rw_lock_x_unlock(dict_index_get_lock(index));
|
|
|
|
if (table->is_temporary()) {
|
|
dict_drop_temporary_table_index(index, page);
|
|
}
|
|
}
|
|
|
|
err = DB_SUCCESS;
|
|
|
|
space_id_t space_id;
|
|
bool is_temp;
|
|
bool is_discarded;
|
|
bool shared_tablespace;
|
|
table_id_t table_id;
|
|
char *table_name;
|
|
|
|
space_id = table->space;
|
|
table_id = table->id;
|
|
is_discarded = dict_table_is_discarded(table);
|
|
is_temp = table->is_temporary();
|
|
shared_tablespace = DICT_TF_HAS_SHARED_SPACE(table->flags);
|
|
|
|
/* We do not allow temporary tables with a remote path. */
|
|
ut_a(!(is_temp && DICT_TF_HAS_DATA_DIR(table->flags)));
|
|
|
|
/* Make sure the data_dir_path is set if needed. */
|
|
dd_get_and_save_data_dir_path(table, table_def, true);
|
|
|
|
if (dict_table_has_fts_index(table) ||
|
|
DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
|
|
ut_ad(!is_temp);
|
|
|
|
err = row_drop_ancillary_fts_tables(table, &aux_vec, trx);
|
|
if (err != DB_SUCCESS) {
|
|
goto funct_exit;
|
|
}
|
|
}
|
|
|
|
/* Table space file name has been renamed in TRUNCATE. */
|
|
table_name = table->trunc_name.m_name;
|
|
if (table_name == nullptr) {
|
|
table_name = table->name.m_name;
|
|
} else {
|
|
table->trunc_name.m_name = nullptr;
|
|
}
|
|
|
|
/* Determine the tablespace filename before we drop
|
|
dict_table_t. Free this memory before returning. */
|
|
if (DICT_TF_HAS_DATA_DIR(table->flags)) {
|
|
auto dir = dict_table_get_datadir(table);
|
|
|
|
filepath = Fil_path::make(dir, table_name, IBD, true);
|
|
|
|
} else if (!shared_tablespace) {
|
|
filepath = Fil_path::make_ibd_from_table_name(table_name);
|
|
}
|
|
|
|
/* Free the dict_table_t object. */
|
|
err = row_drop_table_from_cache(table, trx);
|
|
if (err != DB_SUCCESS) {
|
|
ut_ad(0);
|
|
goto funct_exit;
|
|
}
|
|
|
|
if (!is_temp) {
|
|
log_ddl->write_drop_log(trx, table_id);
|
|
}
|
|
|
|
/* Do not attempt to drop known-to-be-missing tablespaces,
|
|
nor system or shared general tablespaces. */
|
|
if (is_discarded || is_temp || shared_tablespace ||
|
|
fsp_is_system_or_temp_tablespace(space_id)) {
|
|
goto funct_exit;
|
|
}
|
|
|
|
ut_ad(file_per_table);
|
|
err = log_ddl->write_delete_space_log(trx, nullptr, space_id, filepath, true,
|
|
true);
|
|
|
|
funct_exit:
|
|
|
|
ut_free(filepath);
|
|
|
|
if (locked_dictionary) {
|
|
row_mysql_unlock_data_dictionary(trx);
|
|
}
|
|
|
|
trx->op_info = "";
|
|
trx->dict_operation = TRX_DICT_OP_NONE;
|
|
|
|
if (aux_vec.aux_name.size() > 0) {
|
|
if (trx->dict_operation_lock_mode == RW_X_LATCH) {
|
|
mutex_exit(&dict_sys->mutex);
|
|
}
|
|
|
|
if (!fts_drop_dd_tables(&aux_vec, file_per_table)) {
|
|
err = DB_ERROR;
|
|
}
|
|
|
|
if (trx->dict_operation_lock_mode == RW_X_LATCH) {
|
|
mutex_enter(&dict_sys->mutex);
|
|
}
|
|
|
|
fts_free_aux_names(&aux_vec);
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
MY_ATTRIBUTE((warn_unused_result))
|
|
bool row_is_mysql_tmp_table_name(const char *name) {
|
|
return (strstr(name, "/" TEMP_FILE_PREFIX) != NULL);
|
|
/* return(strstr(name, "/@0023sql") != NULL); */
|
|
}
|
|
|
|
/** Renames a table for MySQL.
|
|
@param[in] old_name old table name
|
|
@param[in] new_name new table name
|
|
@param[in] dd_table dd::Table for new table
|
|
@param[in,out] trx transaction
|
|
@param[in] replay whether in replay stage
|
|
@return error code or DB_SUCCESS */
|
|
dberr_t row_rename_table_for_mysql(const char *old_name, const char *new_name,
|
|
const dd::Table *dd_table, trx_t *trx,
|
|
bool replay) {
|
|
dict_table_t *table = NULL;
|
|
ibool dict_locked = FALSE;
|
|
dberr_t err = DB_ERROR;
|
|
mem_heap_t *heap = NULL;
|
|
const char **constraints_to_drop = NULL;
|
|
ulint n_constraints_to_drop = 0;
|
|
int retry;
|
|
|
|
ut_a(old_name != NULL);
|
|
ut_a(new_name != NULL);
|
|
ut_ad(trx->state == TRX_STATE_ACTIVE);
|
|
|
|
if (srv_force_recovery) {
|
|
ib::info(ER_IB_MSG_995) << MODIFICATIONS_NOT_ALLOWED_MSG_FORCE_RECOVERY;
|
|
return (DB_READ_ONLY);
|
|
}
|
|
|
|
trx->op_info = "renaming table";
|
|
|
|
const bool old_is_tmp = row_is_mysql_tmp_table_name(old_name);
|
|
const bool new_is_tmp = row_is_mysql_tmp_table_name(new_name);
|
|
THD *thd = trx->mysql_thd;
|
|
|
|
dict_locked = trx->dict_operation_lock_mode == RW_X_LATCH;
|
|
|
|
/* thd could be NULL if these are FTS AUX tables */
|
|
table = dd_table_open_on_name(thd, NULL, old_name, dict_locked,
|
|
DICT_ERR_IGNORE_NONE);
|
|
if (!table) {
|
|
err = DB_TABLE_NOT_FOUND;
|
|
goto funct_exit;
|
|
|
|
} else if (table->ibd_file_missing && !dict_table_is_discarded(table)) {
|
|
err = DB_TABLE_NOT_FOUND;
|
|
|
|
ib::error(ER_IB_MSG_996) << "Table " << old_name
|
|
<< " does not have an .ibd"
|
|
" file in the database directory. "
|
|
<< TROUBLESHOOTING_MSG;
|
|
|
|
goto funct_exit;
|
|
|
|
} else if (new_is_tmp) {
|
|
/* MySQL is doing an ALTER TABLE command and it renames the
|
|
original table to a temporary table name. We want to preserve
|
|
the original foreign key constraint definitions despite the
|
|
name change. An exception is those constraints for which
|
|
the ALTER TABLE contained DROP FOREIGN KEY <foreign key id>.*/
|
|
|
|
heap = mem_heap_create(100);
|
|
|
|
err = dict_foreign_parse_drop_constraints(
|
|
heap, trx, table, &n_constraints_to_drop, &constraints_to_drop);
|
|
|
|
if (err != DB_SUCCESS) {
|
|
goto funct_exit;
|
|
}
|
|
}
|
|
|
|
/* Is a foreign key check running on this table? */
|
|
for (retry = 0; retry < 100 && table->n_foreign_key_checks_running > 0;
|
|
++retry) {
|
|
row_mysql_unlock_data_dictionary(trx);
|
|
os_thread_yield();
|
|
row_mysql_lock_data_dictionary(trx);
|
|
}
|
|
|
|
if (table->n_foreign_key_checks_running > 0) {
|
|
ib::error(ER_IB_MSG_997) << "In ALTER TABLE " << ut_get_name(trx, old_name)
|
|
<< " a FOREIGN KEY check is running. Cannot rename"
|
|
" table.";
|
|
err = DB_TABLE_IN_FK_CHECK;
|
|
goto funct_exit;
|
|
}
|
|
|
|
err = DB_SUCCESS;
|
|
|
|
if (dict_table_has_fts_index(table) &&
|
|
!dict_tables_have_same_db(old_name, new_name)) {
|
|
err = fts_rename_aux_tables(table, new_name, trx, replay);
|
|
}
|
|
if (err != DB_SUCCESS) {
|
|
if (err == DB_DUPLICATE_KEY) {
|
|
ib::error(ER_IB_MSG_998) << "Possible reasons:";
|
|
ib::error(ER_IB_MSG_999) << "(1) Table rename would cause two"
|
|
" FOREIGN KEY constraints to have the same"
|
|
" internal name in case-insensitive"
|
|
" comparison.";
|
|
ib::error(ER_IB_MSG_1000)
|
|
<< "(2) Table " << ut_get_name(trx, new_name)
|
|
<< " exists in the InnoDB internal data"
|
|
" dictionary though MySQL is trying to rename"
|
|
" table "
|
|
<< ut_get_name(trx, old_name) << " to it.";
|
|
ib::info(ER_IB_MSG_1001) << TROUBLESHOOTING_MSG;
|
|
ib::error(ER_IB_MSG_1002)
|
|
<< "If table " << ut_get_name(trx, new_name)
|
|
<< " is a temporary table #sql..., then"
|
|
" it can be that there are still queries"
|
|
" running on the table, and it will be dropped"
|
|
" automatically when the queries end. You can"
|
|
" drop the orphaned table inside InnoDB by"
|
|
" creating an InnoDB table with the same name"
|
|
" in another database. Then MySQL thinks"
|
|
" the table exists, and DROP TABLE will"
|
|
" succeed.";
|
|
}
|
|
trx->error_state = DB_SUCCESS;
|
|
} else {
|
|
/* The following call will also rename the .ibd data file if
|
|
the table is stored in a single-table tablespace */
|
|
|
|
err = dict_table_rename_in_cache(table, new_name,
|
|
!table->refresh_fk && !new_is_tmp);
|
|
if (err != DB_SUCCESS) {
|
|
trx->error_state = DB_SUCCESS;
|
|
goto funct_exit;
|
|
}
|
|
|
|
/* In case of copy alter, template db_name and
|
|
table_name should be renamed only for newly
|
|
created table. */
|
|
if (table->vc_templ != NULL && !new_is_tmp) {
|
|
innobase_rename_vc_templ(table);
|
|
}
|
|
|
|
if (!dd_table) {
|
|
goto funct_exit;
|
|
}
|
|
|
|
/* We only want to switch off some of the type checking in
|
|
an ALTER TABLE...ALGORITHM=COPY, not in a RENAME. */
|
|
dict_names_t fk_tables;
|
|
|
|
THD *thd = current_thd;
|
|
dd::cache::Dictionary_client *client = dd::get_dd_client(thd);
|
|
dd::cache::Dictionary_client::Auto_releaser releaser(client);
|
|
|
|
/* If neither the old table, nor the new table is temporary, then it is a
|
|
table rename command. */
|
|
const bool is_rename = (!old_is_tmp && !new_is_tmp);
|
|
|
|
/* When table->refresh_fk is true, the foreign keys will be loaded when the
|
|
table is opened. */
|
|
if (is_rename || !table->refresh_fk) {
|
|
if (dict_locked) {
|
|
ut_ad(mutex_own(&dict_sys->mutex));
|
|
mutex_exit(&dict_sys->mutex);
|
|
}
|
|
|
|
err = dd_table_load_fk(client, new_name, nullptr, table, dd_table, thd,
|
|
false, !old_is_tmp || trx->check_foreigns,
|
|
&fk_tables);
|
|
|
|
if (dict_locked) {
|
|
mutex_enter(&dict_sys->mutex);
|
|
}
|
|
|
|
if (is_rename) {
|
|
/* Ensure that old renamed table names are not in this list. */
|
|
for (auto it = fk_tables.begin(); it != fk_tables.end();) {
|
|
if (strcmp(*it, old_name) == 0) {
|
|
it = fk_tables.erase(it);
|
|
} else {
|
|
++it;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (err != DB_SUCCESS) {
|
|
if (old_is_tmp) {
|
|
ib::error(ER_IB_MSG_1003)
|
|
<< "In ALTER TABLE " << ut_get_name(trx, new_name)
|
|
<< " has or is referenced in foreign"
|
|
" key constraints which are not"
|
|
" compatible with the new table"
|
|
" definition.";
|
|
} else {
|
|
ib::error(ER_IB_MSG_1004)
|
|
<< "In RENAME TABLE table " << ut_get_name(trx, new_name)
|
|
<< " is referenced in foreign key"
|
|
" constraints which are not compatible"
|
|
" with the new table definition.";
|
|
}
|
|
|
|
dberr_t error = dict_table_rename_in_cache(table, old_name, FALSE);
|
|
|
|
ut_a(error == DB_SUCCESS);
|
|
goto funct_exit;
|
|
}
|
|
/* Check whether virtual column or stored column affects
|
|
the foreign key constraint of the table. */
|
|
|
|
if (dict_foreigns_has_s_base_col(table->foreign_set, table)) {
|
|
err = DB_NO_FK_ON_S_BASE_COL;
|
|
dberr_t error = dict_table_rename_in_cache(table, old_name, FALSE);
|
|
|
|
ut_a(error == DB_SUCCESS);
|
|
goto funct_exit;
|
|
}
|
|
|
|
/* Fill the virtual column set in foreign when
|
|
the table undergoes copy alter operation. */
|
|
dict_mem_table_free_foreign_vcol_set(table);
|
|
dict_mem_table_fill_foreign_vcol_set(table);
|
|
|
|
dd_open_fk_tables(fk_tables, dict_locked, thd);
|
|
}
|
|
|
|
funct_exit:
|
|
if (table != NULL) {
|
|
dd_table_close(table, thd, NULL, dict_locked);
|
|
}
|
|
|
|
if (UNIV_LIKELY_NULL(heap)) {
|
|
mem_heap_free(heap);
|
|
}
|
|
|
|
trx->op_info = "";
|
|
|
|
return (err);
|
|
}
|
|
|
|
/** Read the total number of records in a consistent view.
|
|
@param[in,out] trx Covering transaction.
|
|
@param[in] indexes Indexes to scan.
|
|
@param[in] max_threads Maximum number of threads to use.
|
|
@param[out] n_rows Number of rows seen.
|
|
@param[out] n_del_mark RDS add, Number of rows read with delete marked.
|
|
@return DB_SUCCESS or error code. */
|
|
dberr_t row_mysql_parallel_select_count_star(
|
|
trx_t *trx, std::vector<dict_index_t *> &indexes, size_t max_threads,
|
|
ulint *n_rows, ulonglong *n_del_mark) {
|
|
ut_a(!indexes.empty());
|
|
using Shards = Counter::Shards<Parallel_reader::MAX_THREADS>;
|
|
|
|
Shards n_recs;
|
|
Counter::clear(n_recs);
|
|
|
|
Shards n_rows_read_del_mark{};
|
|
Counter::clear(n_rows_read_del_mark);
|
|
|
|
struct Check_interrupt {
|
|
byte m_pad[INNOBASE_CACHE_LINE_SIZE - (sizeof(size_t) + sizeof(void *))];
|
|
size_t m_count{};
|
|
const buf_block_t *m_prev_block{};
|
|
};
|
|
|
|
Check_interrupt checker[Parallel_reader::MAX_THREADS] = {};
|
|
|
|
Parallel_reader reader(max_threads);
|
|
|
|
ib::info() << "Parallel scan: " << max_threads;
|
|
|
|
const Parallel_reader::Scan_range FULL_SCAN;
|
|
|
|
// clang-format off
|
|
bool success{};
|
|
|
|
for (auto index : indexes) {
|
|
Parallel_reader::Config config(FULL_SCAN, index);
|
|
|
|
config.m_ptr_n_rows_read_del_mark = &n_rows_read_del_mark;
|
|
|
|
success =
|
|
reader.add_scan(trx, config, [&](const Parallel_reader::Ctx *ctx) {
|
|
Counter::inc(n_recs, ctx->m_thread_id);
|
|
|
|
auto &check = checker[ctx->m_thread_id];
|
|
|
|
if (ctx->m_block != check.m_prev_block) {
|
|
check.m_prev_block = ctx->m_block;
|
|
|
|
++check.m_count;
|
|
|
|
if (!(check.m_count % 64) && trx_is_interrupted(trx)) {
|
|
return (DB_INTERRUPTED);
|
|
}
|
|
}
|
|
return (DB_SUCCESS);
|
|
});
|
|
|
|
if (!success) {
|
|
break;
|
|
}
|
|
}
|
|
// clang-format on
|
|
|
|
auto err = success ? reader.run() : DB_ERROR;
|
|
|
|
if (err == DB_SUCCESS) {
|
|
Counter::for_each(n_recs, [=](const Counter::Type n) {
|
|
if (n > 0) {
|
|
*n_rows += n;
|
|
ib::info() << "n: " << n;
|
|
}
|
|
});
|
|
}
|
|
|
|
if (n_del_mark != nullptr && err == DB_SUCCESS) {
|
|
Counter::for_each(n_rows_read_del_mark, [=](const Counter::Type n) {
|
|
if (n > 0) *n_del_mark += n;
|
|
});
|
|
}
|
|
|
|
return (err);
|
|
}
|
|
|
|
/** Scan the rows in parallel.
|
|
@param[in,out] trx Transaction covering the scan.
|
|
@param[in] index (Cluster) Index to scan.
|
|
@param[in] max_threads Maximum threads to use for the scan.
|
|
@param[out] n_rows Number of rows seen.
|
|
@param[out] n_del_mark RDS add, Number of rows read with delete marked.
|
|
@return DB_SUCCESS or error code. */
|
|
static dberr_t parallel_check_table(trx_t *trx, dict_index_t *index,
|
|
size_t max_threads, ulint *n_rows,
|
|
ulonglong *n_del_mark = nullptr) {
|
|
using Shards = Counter::Shards<Parallel_reader::MAX_THREADS>;
|
|
|
|
Shards n_recs{};
|
|
Shards n_dups{};
|
|
Shards n_corrupt{};
|
|
|
|
Counter::clear(n_dups);
|
|
Counter::clear(n_recs);
|
|
Counter::clear(n_corrupt);
|
|
|
|
Shards n_rows_read_del_mark{};
|
|
Counter::clear(n_rows_read_del_mark);
|
|
|
|
using Tuples = std::vector<dtuple_t *, ut_allocator<dtuple_t *>>;
|
|
using Heaps = std::vector<mem_heap_t *, ut_allocator<mem_heap_t *>>;
|
|
using Blocks =
|
|
std::vector<const buf_block_t *, ut_allocator<const buf_block_t *>>;
|
|
|
|
Tuples prev_tuples;
|
|
Blocks prev_blocks;
|
|
|
|
Heaps heaps;
|
|
|
|
for (size_t i = 0; i < max_threads; ++i) {
|
|
heaps.push_back(mem_heap_create(100));
|
|
}
|
|
|
|
/* Check for transaction interrupted every 1000 rows. */
|
|
size_t counter = 1000;
|
|
|
|
Parallel_reader reader(max_threads);
|
|
|
|
Parallel_reader::Scan_range full_scan;
|
|
|
|
Parallel_reader::Config config(full_scan, index);
|
|
|
|
config.m_ptr_n_rows_read_del_mark = &n_rows_read_del_mark;
|
|
|
|
// clang-format off
|
|
auto success = reader.add_scan(
|
|
trx, config, [&](const Parallel_reader::Ctx* ctx) {
|
|
|
|
const auto rec = ctx->m_rec;
|
|
const auto block = ctx->m_block;
|
|
const auto id = ctx->m_thread_id;
|
|
|
|
Counter::inc(n_recs, id);
|
|
|
|
/* Only check the THD state for the first thread. */
|
|
if (id == 0) {
|
|
--counter;
|
|
|
|
if (counter == 0 && trx_is_interrupted(trx)) {
|
|
return (DB_INTERRUPTED);
|
|
}
|
|
|
|
counter = 1000;
|
|
}
|
|
|
|
auto heap = heaps[id];
|
|
|
|
if (ctx->m_start) {
|
|
/* Starting scan of a new range. We need to reset the previous tuple
|
|
because we don't know what the value of the previous last tuple was. */
|
|
prev_tuples[id] = nullptr;
|
|
}
|
|
|
|
auto prev_tuple = prev_tuples[id];
|
|
|
|
auto offsets = rec_get_offsets(rec, index, nullptr, ULINT_UNDEFINED, &heap);
|
|
|
|
if (prev_tuple != nullptr) {
|
|
ulint matched_fields = 0;
|
|
|
|
auto cmp = prev_tuple->compare(rec, index, offsets, &matched_fields);
|
|
|
|
/* In a unique secondary index we allow equal key values if
|
|
they contain SQL NULLs */
|
|
|
|
bool contains_null = false;
|
|
const auto n_ordering = dict_index_get_n_ordering_defined_by_user(index);
|
|
|
|
for (size_t i = 0; i < n_ordering; ++i) {
|
|
const auto nth_field = dtuple_get_nth_field(prev_tuple, i);
|
|
|
|
if (UNIV_SQL_NULL == dfield_get_len(nth_field)) {
|
|
contains_null = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (cmp > 0) {
|
|
Counter::inc(n_corrupt, id);
|
|
|
|
ib::error() << "Index records in a wrong order in " << index->name
|
|
<< " of table " << index->table->name << ": " << *prev_tuple
|
|
<< ", " << rec_offsets_print(rec, offsets);
|
|
/* Continue reading */
|
|
} else if (dict_index_is_unique(index) && !contains_null &&
|
|
matched_fields >=
|
|
dict_index_get_n_ordering_defined_by_user(index)) {
|
|
Counter::inc(n_dups, id);
|
|
|
|
ib::error() << "Duplicate key in " << index->name << " of table "
|
|
<< index->table->name << ": " << *prev_tuple << ", "
|
|
<< rec_offsets_print(rec, offsets);
|
|
}
|
|
}
|
|
|
|
if (prev_blocks[id] != block || prev_blocks[id] == nullptr) {
|
|
mem_heap_empty(heap);
|
|
offsets = rec_get_offsets(rec, index, nullptr, ULINT_UNDEFINED, &heap);
|
|
|
|
prev_blocks[id] = block;
|
|
}
|
|
|
|
ulint n_ext;
|
|
|
|
prev_tuples[id] = row_rec_to_index_entry(rec, index, offsets, &n_ext, heap);
|
|
|
|
return (DB_SUCCESS);
|
|
});
|
|
|
|
// clang-format off
|
|
|
|
dberr_t err;
|
|
|
|
if (success) {
|
|
prev_tuples.resize(max_threads);
|
|
prev_blocks.resize(max_threads);
|
|
|
|
err = reader.run();
|
|
} else {
|
|
err = DB_ERROR;
|
|
}
|
|
|
|
for (auto heap : heaps) {
|
|
mem_heap_free(heap);
|
|
}
|
|
|
|
if (Counter::total(n_dups) > 0) {
|
|
ib::error() << "Found " << Counter::total(n_dups) << " duplicate rows in "
|
|
<< index->name;
|
|
|
|
err = DB_DUPLICATE_KEY;
|
|
}
|
|
|
|
if (Counter::total(n_corrupt) > 0) {
|
|
ib::error() << "Found " << Counter::total(n_corrupt)
|
|
<< " rows in the wrong order in " << index->name;
|
|
|
|
err = DB_INDEX_CORRUPT;
|
|
}
|
|
|
|
*n_rows = Counter::total(n_recs);
|
|
if (n_del_mark != nullptr) {
|
|
Counter::for_each(n_rows_read_del_mark, [=](const Counter::Type n) {
|
|
if (n > 0) *n_del_mark += n;
|
|
});
|
|
}
|
|
|
|
return (err);
|
|
}
|
|
|
|
dberr_t row_scan_index_for_mysql(row_prebuilt_t *prebuilt, dict_index_t *index,
|
|
size_t n_threads, bool check_keys,
|
|
ulint *n_rows) {
|
|
*n_rows = 0;
|
|
|
|
/* Don't support RTree Leaf level scan */
|
|
ut_ad(!dict_index_is_spatial(index));
|
|
|
|
if (index->is_clustered()) {
|
|
/* The clustered index of a table is always available.
|
|
During online ALTER TABLE that rebuilds the table, the
|
|
clustered index in the old table will have
|
|
index->online_log pointing to the new table. All
|
|
indexes of the old table will remain valid and the new
|
|
table will be unaccessible to MySQL until the
|
|
completion of the ALTER TABLE. */
|
|
} else if (dict_index_is_online_ddl(index) || (index->type & DICT_FTS)) {
|
|
/* Full Text index are implemented by auxiliary tables,
|
|
not the B-tree. We also skip secondary indexes that are
|
|
being created online. */
|
|
return (DB_SUCCESS);
|
|
}
|
|
|
|
DBUG_EXECUTE_IF("ib_disable_parallel_read", goto skip_parallel_read;);
|
|
|
|
if (prebuilt->trx->isolation_level > TRX_ISO_READ_UNCOMMITTED &&
|
|
prebuilt->select_lock_type == LOCK_NONE && index->is_clustered() &&
|
|
(check_keys || prebuilt->trx->mysql_n_tables_locked == 0) &&
|
|
!prebuilt->ins_sel_stmt) {
|
|
|
|
n_threads = Parallel_reader::available_threads(n_threads);
|
|
|
|
if (n_threads > 0) {
|
|
/* No INSERT INTO ... SELECT and non-locking selects only. */
|
|
trx_start_if_not_started_xa(prebuilt->trx, false);
|
|
|
|
trx_assign_read_view(prebuilt->trx);
|
|
|
|
auto trx = prebuilt->trx;
|
|
|
|
ut_a(prebuilt->table == index->table);
|
|
|
|
std::vector<dict_index_t*> indexes;
|
|
|
|
indexes.push_back(index);
|
|
|
|
if (!prebuilt->table->is_temporary()) {
|
|
ulonglong *n_del_mark = &(prebuilt->rds_rows_read_del_mark);
|
|
|
|
if (!check_keys) {
|
|
return (row_mysql_parallel_select_count_star(trx, indexes, n_threads,
|
|
n_rows,
|
|
n_del_mark/* RDS add*/));
|
|
}
|
|
|
|
return (parallel_check_table(trx, index, n_threads, n_rows,
|
|
n_del_mark /* RDS add */));
|
|
}
|
|
|
|
if (!check_keys) {
|
|
return (row_mysql_parallel_select_count_star(trx, indexes, n_threads,
|
|
n_rows));
|
|
}
|
|
|
|
return (parallel_check_table(trx, index, n_threads, n_rows));
|
|
}
|
|
}
|
|
|
|
#ifdef UNIV_DEBUG
|
|
skip_parallel_read:
|
|
#endif /* UNIV_DEBUG */
|
|
|
|
bool contains_null;
|
|
rec_t *rec = nullptr;
|
|
ulint matched_fields;
|
|
dtuple_t *prev_entry = nullptr;
|
|
ulint offsets_[REC_OFFS_NORMAL_SIZE];
|
|
ulint *offsets;
|
|
|
|
rec_offs_init(offsets_);
|
|
|
|
ulint cnt = 1000;
|
|
ulint bufsize = ut_max(UNIV_PAGE_SIZE, prebuilt->mysql_row_len);
|
|
auto buf = static_cast<byte *>(ut_malloc_nokey(bufsize));
|
|
auto heap = mem_heap_create(100);
|
|
|
|
auto ret = row_search_for_mysql(buf, PAGE_CUR_G, prebuilt, 0, 0);
|
|
|
|
loop:
|
|
/* Check thd->killed every 1,000 scanned rows */
|
|
if (--cnt == 0) {
|
|
if (trx_is_interrupted(prebuilt->trx)) {
|
|
ret = DB_INTERRUPTED;
|
|
goto func_exit;
|
|
}
|
|
cnt = 1000;
|
|
}
|
|
|
|
switch (ret) {
|
|
case DB_SUCCESS:
|
|
break;
|
|
case DB_DEADLOCK:
|
|
case DB_LOCK_TABLE_FULL:
|
|
case DB_LOCK_WAIT_TIMEOUT:
|
|
case DB_INTERRUPTED:
|
|
goto func_exit;
|
|
default: {
|
|
const char *doing = check_keys ? "CHECK TABLE" : "COUNT(*)";
|
|
ib::warn(ER_IB_MSG_1005) << doing << " on index " << index->name
|
|
<< " of"
|
|
" table "
|
|
<< index->table->name << " returned " << ret;
|
|
}
|
|
/* fall through (this error is ignored by CHECK TABLE) */
|
|
case DB_END_OF_INDEX:
|
|
ret = DB_SUCCESS;
|
|
func_exit:
|
|
ut_free(buf);
|
|
mem_heap_free(heap);
|
|
|
|
return (ret);
|
|
}
|
|
|
|
*n_rows = *n_rows + 1;
|
|
|
|
if (!check_keys) {
|
|
goto next_rec;
|
|
}
|
|
/* else this code is doing handler::check() for CHECK TABLE */
|
|
|
|
/* row_search... returns the index record in buf, record origin offset
|
|
within buf stored in the first 4 bytes, because we have built a dummy
|
|
template */
|
|
|
|
rec = buf + mach_read_from_4(buf);
|
|
|
|
offsets = rec_get_offsets(rec, index, offsets_, ULINT_UNDEFINED, &heap);
|
|
|
|
if (prev_entry != NULL) {
|
|
matched_fields = 0;
|
|
|
|
auto cmp = prev_entry->compare(rec, index, offsets, &matched_fields);
|
|
contains_null = false;
|
|
|
|
/* In a unique secondary index we allow equal key values if
|
|
they contain SQL NULLs */
|
|
|
|
const auto n_ordering = dict_index_get_n_ordering_defined_by_user(index);
|
|
|
|
for (ulint i = 0; i < n_ordering; ++i) {
|
|
if (UNIV_SQL_NULL ==
|
|
dfield_get_len(dtuple_get_nth_field(prev_entry, i))) {
|
|
contains_null = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
const char *msg;
|
|
|
|
if (cmp > 0) {
|
|
ret = DB_INDEX_CORRUPT;
|
|
msg = "index records in a wrong order in ";
|
|
not_ok:
|
|
ib::error(ER_IB_MSG_1006)
|
|
<< msg << index->name << " of table " << index->table->name << ": "
|
|
<< *prev_entry << ", " << rec_offsets_print(rec, offsets);
|
|
/* Continue reading */
|
|
} else if (dict_index_is_unique(index) && !contains_null &&
|
|
matched_fields >=
|
|
dict_index_get_n_ordering_defined_by_user(index)) {
|
|
ret = DB_DUPLICATE_KEY;
|
|
msg = "duplicate key in ";
|
|
goto not_ok;
|
|
}
|
|
}
|
|
|
|
{
|
|
mem_heap_t *tmp_heap = NULL;
|
|
|
|
/* Empty the heap on each round. But preserve offsets[]
|
|
for the row_rec_to_index_entry() call, by copying them
|
|
into a separate memory heap when needed. */
|
|
if (UNIV_UNLIKELY(offsets != offsets_)) {
|
|
ulint size = rec_offs_get_n_alloc(offsets) * sizeof *offsets;
|
|
|
|
tmp_heap = mem_heap_create(size);
|
|
|
|
offsets = static_cast<ulint *>(mem_heap_dup(tmp_heap, offsets, size));
|
|
}
|
|
|
|
mem_heap_empty(heap);
|
|
|
|
ulint n_ext = 0;
|
|
|
|
prev_entry = row_rec_to_index_entry(rec, index, offsets, &n_ext, heap);
|
|
|
|
if (UNIV_LIKELY_NULL(tmp_heap)) {
|
|
mem_heap_free(tmp_heap);
|
|
}
|
|
}
|
|
|
|
next_rec:
|
|
ret = row_search_for_mysql(buf, PAGE_CUR_G, prebuilt, 0, ROW_SEL_NEXT);
|
|
|
|
goto loop;
|
|
}
|
|
|
|
/** Initialize this module */
|
|
void row_mysql_init(void) {
|
|
mutex_create(LATCH_ID_ROW_DROP_LIST, &row_drop_list_mutex);
|
|
|
|
UT_LIST_INIT(row_mysql_drop_list, &row_mysql_drop_t::row_mysql_drop_list);
|
|
|
|
row_mysql_drop_list_inited = TRUE;
|
|
}
|
|
|
|
/** Close this module */
|
|
void row_mysql_close(void) {
|
|
ut_a(UT_LIST_GET_LEN(row_mysql_drop_list) == 0);
|
|
|
|
mutex_free(&row_drop_list_mutex);
|
|
|
|
row_mysql_drop_list_inited = FALSE;
|
|
}
|
|
|
|
/** Can a record buffer or a prefetch cache be utilized for prefetching
|
|
records in this scan?
|
|
@retval true if records can be prefetched
|
|
@retval false if records cannot be prefetched */
|
|
bool row_prebuilt_t::can_prefetch_records() const {
|
|
/* Inside an update, for example, we do not cache rows, since
|
|
we may use the cursor position to do the actual update, that
|
|
is why we require select_lock_type == LOCK_NONE. Since we keep
|
|
space in prebuilt only for the BLOBs of a single row, we
|
|
cannot cache rows in the case there are BLOBs in the fields to
|
|
be fetched. In HANDLER (note: the HANDLER statement, not the
|
|
handler class) we do not cache rows because there the cursor
|
|
is a scrollable cursor. */
|
|
return select_lock_type == LOCK_NONE && !m_no_prefetch &&
|
|
!templ_contains_blob && !templ_contains_fixed_point &&
|
|
!clust_index_was_generated && !used_in_HANDLER && !innodb_api &&
|
|
template_type != ROW_MYSQL_DUMMY_TEMPLATE && !in_fts_query;
|
|
}
|