2510 lines
87 KiB
C++
2510 lines
87 KiB
C++
/*
|
|
Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License, version 2.0,
|
|
as published by the Free Software Foundation.
|
|
|
|
This program is also distributed with certain software (including
|
|
but not limited to OpenSSL) that is licensed under separate terms,
|
|
as designated in a particular file or component or in included license
|
|
documentation. The authors of MySQL hereby grant you an additional
|
|
permission to link the program and your derivative works with the
|
|
separately licensed software that they have included with MySQL.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License, version 2.0, for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
|
|
|
|
/**
|
|
@file sql/filesort.cc
|
|
|
|
Standard external sort. We read rows into a buffer until there's no more room.
|
|
At that point, we use it (using the sorting algorithms from STL), and write it
|
|
to disk (thus the name “filesort”). When there are no more rows, we merge
|
|
chunks recursively, seven and seven (although we can go all the way up to 15
|
|
in the final pass if it helps us do one pass less).
|
|
|
|
If all the rows fit in a single chunk, the data never hits disk, but remains
|
|
in RAM.
|
|
*/
|
|
|
|
#include "sql/filesort.h"
|
|
|
|
#include <limits.h>
|
|
#include <math.h>
|
|
#include <stddef.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <algorithm>
|
|
#include <atomic>
|
|
#include <memory>
|
|
#include <new>
|
|
#include <vector>
|
|
|
|
#include "add_with_saturate.h"
|
|
#include "binlog_config.h"
|
|
#include "decimal.h"
|
|
#include "field_types.h" // enum_field_types
|
|
#include "m_ctype.h"
|
|
#include "map_helpers.h"
|
|
#include "my_basename.h"
|
|
#include "my_bitmap.h"
|
|
#include "my_byteorder.h"
|
|
#include "my_compiler.h"
|
|
#include "my_dbug.h"
|
|
#include "my_inttypes.h"
|
|
#include "my_loglevel.h"
|
|
#include "my_sys.h"
|
|
#include "mysql/components/services/log_builtins.h"
|
|
#include "mysql/components/services/log_shared.h"
|
|
#include "mysql/psi/mysql_file.h"
|
|
#include "mysql/service_mysql_alloc.h"
|
|
#include "mysql/udf_registration_types.h"
|
|
#include "mysql_com.h"
|
|
#include "mysqld_error.h"
|
|
#include "nullable.h"
|
|
#include "priority_queue.h"
|
|
#include "sql/auth/sql_security_ctx.h"
|
|
#include "sql/bounded_queue.h"
|
|
#include "sql/cmp_varlen_keys.h"
|
|
#include "sql/debug_sync.h"
|
|
#include "sql/derror.h"
|
|
#include "sql/error_handler.h"
|
|
#include "sql/field.h"
|
|
#include "sql/filesort_utils.h"
|
|
#include "sql/handler.h"
|
|
#include "sql/item.h"
|
|
#include "sql/item_subselect.h"
|
|
#include "sql/json_dom.h" // Json_wrapper
|
|
#include "sql/key_spec.h"
|
|
#include "sql/malloc_allocator.h"
|
|
#include "sql/merge_many_buff.h"
|
|
#include "sql/my_decimal.h"
|
|
#include "sql/mysqld.h" // mysql_tmpdir
|
|
#include "sql/opt_costmodel.h"
|
|
#include "sql/opt_trace.h"
|
|
#include "sql/opt_trace_context.h"
|
|
#include "sql/pfs_batch_mode.h"
|
|
#include "sql/psi_memory_key.h"
|
|
#include "sql/row_iterator.h"
|
|
#include "sql/sort_param.h"
|
|
#include "sql/sorting_iterator.h"
|
|
#include "sql/sql_array.h"
|
|
#include "sql/sql_base.h"
|
|
#include "sql/sql_bitmap.h"
|
|
#include "sql/sql_class.h"
|
|
#include "sql/sql_const.h"
|
|
#include "sql/sql_error.h"
|
|
#include "sql/sql_executor.h" // QEP_TAB
|
|
#include "sql/sql_lex.h"
|
|
#include "sql/sql_optimizer.h" // JOIN
|
|
#include "sql/sql_sort.h"
|
|
#include "sql/system_variables.h"
|
|
#include "sql/table.h"
|
|
#include "sql/thr_malloc.h"
|
|
#include "sql_string.h"
|
|
#include "template_utils.h"
|
|
|
|
using Mysql::Nullable;
|
|
using std::max;
|
|
using std::min;
|
|
|
|
namespace {
|
|
|
|
struct Mem_compare_queue_key {
|
|
Mem_compare_queue_key() : m_compare_length(0), m_param(nullptr) {}
|
|
|
|
Mem_compare_queue_key(const Mem_compare_queue_key &that)
|
|
: m_compare_length(that.m_compare_length), m_param(that.m_param) {}
|
|
|
|
bool operator()(const uchar *s1, const uchar *s2) const {
|
|
if (m_param)
|
|
return cmp_varlen_keys(m_param->local_sortorder, m_param->use_hash, s1,
|
|
s2);
|
|
|
|
// memcmp(s1, s2, 0) is guaranteed to return zero.
|
|
return memcmp(s1, s2, m_compare_length) < 0;
|
|
}
|
|
|
|
size_t m_compare_length;
|
|
Sort_param *m_param;
|
|
};
|
|
|
|
} // namespace
|
|
|
|
/* functions defined in this file */
|
|
|
|
static ha_rows read_all_rows(
|
|
THD *thd, Sort_param *param, QEP_TAB *qep_tab, Filesort_info *fs_info,
|
|
IO_CACHE *buffer_file, IO_CACHE *chunk_file,
|
|
Bounded_queue<uchar *, uchar *, Sort_param, Mem_compare_queue_key> *pq,
|
|
RowIterator *source_iterator, ha_rows *found_rows, size_t *longest_key);
|
|
static int write_keys(Sort_param *param, Filesort_info *fs_info, uint count,
|
|
IO_CACHE *buffer_file, IO_CACHE *tempfile);
|
|
static void register_used_fields(Sort_param *param);
|
|
static int merge_index(THD *thd, Sort_param *param, Sort_buffer sort_buffer,
|
|
Merge_chunk_array chunk_array, IO_CACHE *tempfile,
|
|
IO_CACHE *outfile);
|
|
static bool save_index(Sort_param *param, uint count, Filesort_info *table_sort,
|
|
Sort_result *sort_result);
|
|
|
|
static bool check_if_pq_applicable(Opt_trace_context *trace, Sort_param *param,
|
|
Filesort_info *info, ha_rows records,
|
|
ulong memory_available);
|
|
|
|
void Sort_param::decide_addon_fields(Filesort *file_sort, TABLE *table,
|
|
ulong max_length_for_sort_data,
|
|
bool force_sort_positions) {
|
|
if (m_addon_fields_status != Addon_fields_status::unknown_status) {
|
|
// Already decided.
|
|
return;
|
|
}
|
|
|
|
// Generally, prefer using addon fields (ie., sorting rows instead of just
|
|
// row IDs) if we can.
|
|
//
|
|
// NOTE: If the table is already in memory (e.g. the MEMORY engine; see the
|
|
// HA_FAST_KEY_READ flag), it would normally be beneficial to sort row IDs
|
|
// over rows to get smaller sort chunks. However, eliding the temporary table
|
|
// entirely is even better than using row IDs, and only possible if we sort
|
|
// rows. Furthermore, since we set up filesort at optimize time, we don't know
|
|
// yet whether the source table would _remain_ in memory, or could be spilled
|
|
// to disk using InnoDB. Thus, the only case that reasonably remains where
|
|
// we'd want to use row IDs without being forced would be on user (ie.,
|
|
// non-temporary) MEMORY tables, which doesn't seem reasonable to add
|
|
// complexity for.
|
|
|
|
if (table->fulltext_searched) {
|
|
m_addon_fields_status = Addon_fields_status::fulltext_searched;
|
|
} else if (force_sort_positions) {
|
|
m_addon_fields_status = Addon_fields_status::keep_rowid;
|
|
} else {
|
|
/*
|
|
Get the descriptors of all fields whose values are appended
|
|
to sorted fields and get its total length in m_addon_length.
|
|
*/
|
|
addon_fields = file_sort->get_addon_fields(
|
|
max_length_for_sort_data, table->field, m_fixed_sort_length,
|
|
&m_addon_fields_status, &m_addon_length, &m_packable_length);
|
|
}
|
|
}
|
|
|
|
void Sort_param::init_for_filesort(Filesort *file_sort,
|
|
Bounds_checked_array<st_sort_field> sf_array,
|
|
uint sortlen, TABLE *table,
|
|
ulong max_length_for_sort_data,
|
|
ha_rows maxrows, bool remove_duplicates) {
|
|
m_fixed_sort_length = sortlen;
|
|
m_force_stable_sort = file_sort->m_force_stable_sort;
|
|
m_remove_duplicates = remove_duplicates;
|
|
ref_length = table->file->ref_length;
|
|
|
|
local_sortorder = sf_array;
|
|
|
|
decide_addon_fields(file_sort, table, max_length_for_sort_data,
|
|
file_sort->m_force_sort_positions);
|
|
if (using_addon_fields()) {
|
|
fixed_res_length = m_addon_length;
|
|
} else {
|
|
fixed_res_length = ref_length;
|
|
/*
|
|
The reference to the record is considered
|
|
as an additional sorted field
|
|
*/
|
|
AddWithSaturate(ref_length, &m_fixed_sort_length);
|
|
}
|
|
|
|
m_num_varlen_keys = count_varlen_keys();
|
|
m_num_json_keys = count_json_keys();
|
|
if (using_varlen_keys()) {
|
|
AddWithSaturate(size_of_varlength_field, &m_fixed_sort_length);
|
|
}
|
|
/*
|
|
Add hash at the end of sort key to order cut values correctly.
|
|
Needed for GROUPing, rather than for ORDERing.
|
|
*/
|
|
if (using_json_keys()) {
|
|
use_hash = true;
|
|
AddWithSaturate(sizeof(ulonglong), &m_fixed_sort_length);
|
|
}
|
|
|
|
m_fixed_rec_length = AddWithSaturate(m_fixed_sort_length, m_addon_length);
|
|
max_rows = maxrows;
|
|
}
|
|
|
|
void Sort_param::try_to_pack_addons(ulong max_length_for_sort_data) {
|
|
if (!using_addon_fields() || // no addons, or
|
|
using_packed_addons()) // already packed
|
|
return;
|
|
|
|
if (!Addon_fields::can_pack_addon_fields(fixed_res_length)) {
|
|
m_addon_fields_status = Addon_fields_status::row_too_large;
|
|
return;
|
|
}
|
|
const uint sz = Addon_fields::size_of_length_field;
|
|
if (m_fixed_rec_length + sz > max_length_for_sort_data) {
|
|
m_addon_fields_status = Addon_fields_status::row_too_large;
|
|
return;
|
|
}
|
|
|
|
// Heuristic: skip packing if potential savings are less than 10 bytes.
|
|
if (m_packable_length < (10 + sz)) {
|
|
m_addon_fields_status = Addon_fields_status::skip_heuristic;
|
|
return;
|
|
}
|
|
|
|
Addon_fields_array::iterator addonf = addon_fields->begin();
|
|
for (; addonf != addon_fields->end(); ++addonf) {
|
|
addonf->offset += sz;
|
|
addonf->null_offset += sz;
|
|
}
|
|
addon_fields->set_using_packed_addons(true);
|
|
m_using_packed_addons = true;
|
|
|
|
m_addon_length += sz;
|
|
fixed_res_length += sz;
|
|
m_fixed_rec_length += sz;
|
|
}
|
|
|
|
int Sort_param::count_varlen_keys() const {
|
|
int retval = 0;
|
|
for (const auto &sf : local_sortorder) {
|
|
if (sf.is_varlen) {
|
|
++retval;
|
|
}
|
|
}
|
|
return retval;
|
|
}
|
|
|
|
int Sort_param::count_json_keys() const {
|
|
int retval = 0;
|
|
for (const auto &sf : local_sortorder) {
|
|
if (sf.field_type == MYSQL_TYPE_JSON) {
|
|
++retval;
|
|
}
|
|
}
|
|
return retval;
|
|
}
|
|
|
|
size_t Sort_param::get_record_length(uchar *p) const {
|
|
uchar *start_of_payload = get_start_of_payload(p);
|
|
uint size_of_payload = using_packed_addons()
|
|
? Addon_fields::read_addon_length(start_of_payload)
|
|
: fixed_res_length;
|
|
uchar *end_of_payload = start_of_payload + size_of_payload;
|
|
return end_of_payload - p;
|
|
}
|
|
|
|
void Sort_param::get_rec_and_res_len(uchar *record_start, uint *recl,
|
|
uint *resl) {
|
|
if (!using_packed_addons() && !using_varlen_keys()) {
|
|
*recl = m_fixed_rec_length;
|
|
*resl = fixed_res_length;
|
|
return;
|
|
}
|
|
uchar *plen = get_start_of_payload(record_start);
|
|
if (using_packed_addons())
|
|
*resl = Addon_fields::read_addon_length(plen);
|
|
else
|
|
*resl = fixed_res_length;
|
|
DBUG_ASSERT(*resl <= fixed_res_length);
|
|
const uchar *record_end = plen + *resl;
|
|
*recl = static_cast<uint>(record_end - record_start);
|
|
}
|
|
|
|
static void trace_filesort_information(Opt_trace_context *trace,
|
|
const st_sort_field *sortorder,
|
|
uint s_length) {
|
|
if (!trace->is_started()) return;
|
|
|
|
Opt_trace_array trace_filesort(trace, "filesort_information");
|
|
for (; s_length--; sortorder++) {
|
|
Opt_trace_object oto(trace);
|
|
oto.add_alnum("direction", sortorder->reverse ? "desc" : "asc");
|
|
|
|
if (sortorder->field) {
|
|
TABLE *t = sortorder->field->table;
|
|
if (strlen(t->alias) != 0)
|
|
oto.add_utf8_table(t->pos_in_table_list);
|
|
else
|
|
oto.add_alnum("table", "intermediate_tmp_table");
|
|
oto.add_alnum("field", sortorder->field->field_name
|
|
? sortorder->field->field_name
|
|
: "tmp_table_column");
|
|
} else
|
|
oto.add("expression", sortorder->item);
|
|
}
|
|
}
|
|
|
|
/**
|
|
Sort a table.
|
|
Creates a set of pointers that can be used to read the rows
|
|
in sorted order. This should be done with the functions
|
|
in records.cc.
|
|
|
|
The result set is stored in fs_info->io_cache or
|
|
fs_info->sorted_result, or left in the main filesort buffer.
|
|
|
|
@param thd Current thread
|
|
@param filesort How to sort the table
|
|
@param source_iterator Where to read the rows to be sorted from.
|
|
@param fs_info Owns the buffers for sort_result.
|
|
@param sort_result Where to store the sort result.
|
|
@param[out] found_rows Store the number of found rows here.
|
|
This is the number of found rows after
|
|
applying WHERE condition.
|
|
|
|
@note
|
|
If we sort by position (like if sort_positions is 1) filesort() will
|
|
call table->prepare_for_position().
|
|
|
|
@returns False if success, true if error
|
|
*/
|
|
|
|
bool filesort(THD *thd, Filesort *filesort, RowIterator *source_iterator,
|
|
Filesort_info *fs_info, Sort_result *sort_result,
|
|
ha_rows *found_rows) {
|
|
int error;
|
|
ulong memory_available = thd->variables.sortbuff_size;
|
|
ha_rows num_rows_found = HA_POS_ERROR;
|
|
ha_rows num_rows_estimate = HA_POS_ERROR;
|
|
IO_CACHE tempfile; // Temporary file for storing intermediate results.
|
|
IO_CACHE chunk_file; // For saving Merge_chunk structs.
|
|
IO_CACHE *outfile; // Contains the final, sorted result.
|
|
Sort_param *param = &filesort->m_sort_param;
|
|
QEP_TAB *const qep_tab = filesort->qep_tab;
|
|
TABLE *const table = qep_tab->table();
|
|
ha_rows max_rows = filesort->limit;
|
|
uint s_length = 0;
|
|
|
|
DBUG_TRACE;
|
|
|
|
if (!(s_length = filesort->sort_order_length()))
|
|
return true; /* purecov: inspected */
|
|
|
|
DBUG_ASSERT(!table->reginfo.join_tab);
|
|
DBUG_ASSERT(qep_tab == table->reginfo.qep_tab);
|
|
Item_subselect *const subselect =
|
|
qep_tab->join() ? qep_tab->join()->select_lex->master_unit()->item : NULL;
|
|
|
|
DEBUG_SYNC(thd, "filesort_start");
|
|
|
|
DBUG_ASSERT(sort_result->sorted_result == NULL);
|
|
sort_result->sorted_result_in_fsbuf = false;
|
|
|
|
outfile = sort_result->io_cache;
|
|
my_b_clear(&tempfile);
|
|
my_b_clear(&chunk_file);
|
|
error = 1;
|
|
|
|
// Make sure the source iterator is initialized before init_for_filesort(),
|
|
// since table->file (and in particular, ref_length) may not be initialized
|
|
// before that.
|
|
DBUG_EXECUTE_IF("bug14365043_1", DBUG_SET("+d,ha_rnd_init_fail"););
|
|
if (source_iterator->Init()) {
|
|
return HA_POS_ERROR;
|
|
}
|
|
|
|
/*
|
|
We need a nameless wrapper, since we may be inside the "steps" of
|
|
"join_execution".
|
|
*/
|
|
Opt_trace_context *const trace = &thd->opt_trace;
|
|
Opt_trace_object trace_wrapper(trace);
|
|
if (qep_tab->join())
|
|
trace_wrapper.add("sorting_table_in_plan_at_position", qep_tab->idx());
|
|
|
|
trace_filesort_information(trace, filesort->sortorder, s_length);
|
|
|
|
param->init_for_filesort(filesort, make_array(filesort->sortorder, s_length),
|
|
sortlength(thd, filesort->sortorder, s_length),
|
|
table, thd->variables.max_length_for_sort_data,
|
|
max_rows, filesort->m_remove_duplicates);
|
|
|
|
fs_info->addon_fields = param->addon_fields;
|
|
|
|
/*
|
|
TODO: Now that we read from RowIterators, the situation is a lot more
|
|
complicated than just “quick is range scan, everything else is full scan”.
|
|
*/
|
|
if (qep_tab->quick())
|
|
thd->inc_status_sort_range();
|
|
else
|
|
thd->inc_status_sort_scan();
|
|
|
|
if (table->file->inited) {
|
|
if (table->s->tmp_table)
|
|
table->file->info(HA_STATUS_VARIABLE); // Get record count
|
|
num_rows_estimate = table->file->estimate_rows_upper_bound();
|
|
} else {
|
|
// If number of rows is not known, use as much of sort buffer as possible.
|
|
num_rows_estimate = HA_POS_ERROR;
|
|
}
|
|
|
|
Bounded_queue<uchar *, uchar *, Sort_param, Mem_compare_queue_key> pq(
|
|
param->max_record_length(),
|
|
(Malloc_allocator<uchar *>(key_memory_Filesort_info_record_pointers)));
|
|
|
|
// We would have liked to do this ahead-of-time so that we can show it
|
|
// in EXPLAIN. However, we don't know the table->file->ref_length before
|
|
// sorting time, which makes it hard to make the decision if we're row IDs.
|
|
// (If we sort rows, we would know, but it's not much use knowing it
|
|
// ahead-of-time _sometimes_.)
|
|
//
|
|
// However, do note this cannot change the addon fields status,
|
|
// so that we at least know that when checking whether we can skip
|
|
// in-between temporary tables (StreamingIterator).
|
|
if (check_if_pq_applicable(trace, param, fs_info, num_rows_estimate,
|
|
memory_available)) {
|
|
DBUG_PRINT("info", ("filesort PQ is applicable"));
|
|
/*
|
|
For PQ queries (with limit) we know exactly how many pointers/records
|
|
we have in the buffer, so to simplify things, we initialize
|
|
all pointers here. (We cannot pack fields anyways, so there is no
|
|
point in doing incremental allocation).
|
|
*/
|
|
if (fs_info->preallocate_records(param->max_rows_per_buffer)) {
|
|
my_error(ER_OUT_OF_SORTMEMORY, ME_FATALERROR);
|
|
LogErr(ERROR_LEVEL, ER_SERVER_OUT_OF_SORTMEMORY);
|
|
goto err;
|
|
}
|
|
|
|
if (pq.init(param->max_rows, param, fs_info->get_sort_keys())) {
|
|
/*
|
|
If we fail to init pq, we have to give up:
|
|
out of memory means my_malloc() will call my_error().
|
|
*/
|
|
DBUG_PRINT("info", ("failed to allocate PQ"));
|
|
fs_info->free_sort_buffer();
|
|
DBUG_ASSERT(thd->is_error());
|
|
goto err;
|
|
}
|
|
filesort->using_pq = true;
|
|
param->using_pq = true;
|
|
param->m_addon_fields_status = Addon_fields_status::using_priority_queue;
|
|
} else {
|
|
DBUG_PRINT("info", ("filesort PQ is not applicable"));
|
|
filesort->using_pq = false;
|
|
param->using_pq = false;
|
|
|
|
/*
|
|
When sorting using priority queue, we cannot use packed addons.
|
|
Without PQ, we can try.
|
|
*/
|
|
param->try_to_pack_addons(thd->variables.max_length_for_sort_data);
|
|
|
|
/*
|
|
NOTE: param->max_rows_per_buffer is merely informative (for optimizer
|
|
trace) in this case, not actually used.
|
|
*/
|
|
if (num_rows_estimate < MERGEBUFF2) num_rows_estimate = MERGEBUFF2;
|
|
ha_rows keys =
|
|
memory_available / (param->max_record_length() + sizeof(char *));
|
|
param->max_rows_per_buffer =
|
|
min(num_rows_estimate > 0 ? num_rows_estimate : 1, keys);
|
|
|
|
fs_info->set_max_size(memory_available, param->max_record_length());
|
|
}
|
|
|
|
param->sort_form = table;
|
|
size_t longest_key;
|
|
|
|
// New scope, because subquery execution must be traced within an array.
|
|
{
|
|
Opt_trace_array ota(trace, "filesort_execution");
|
|
num_rows_found = read_all_rows(thd, param, qep_tab, fs_info, &chunk_file,
|
|
&tempfile, param->using_pq ? &pq : nullptr,
|
|
source_iterator, found_rows, &longest_key);
|
|
if (num_rows_found == HA_POS_ERROR) goto err;
|
|
}
|
|
|
|
size_t num_chunks, num_initial_chunks;
|
|
if (my_b_inited(&chunk_file)) {
|
|
num_chunks =
|
|
static_cast<size_t>(my_b_tell(&chunk_file)) / sizeof(Merge_chunk);
|
|
} else {
|
|
num_chunks = 0;
|
|
}
|
|
|
|
num_initial_chunks = num_chunks;
|
|
|
|
if (num_chunks == 0) // The whole set is in memory
|
|
{
|
|
ha_rows rows_in_chunk =
|
|
param->using_pq ? pq.num_elements() : num_rows_found;
|
|
if (save_index(param, rows_in_chunk, fs_info, sort_result)) goto err;
|
|
} else {
|
|
// If deduplicating, we'll need to remember the previous key somehow.
|
|
if (filesort->m_remove_duplicates) {
|
|
param->m_last_key_seen =
|
|
static_cast<uchar *>(thd->mem_root->Alloc(longest_key));
|
|
}
|
|
|
|
// We will need an extra buffer in SortFileIndirectIterator
|
|
if (fs_info->addon_fields != nullptr &&
|
|
!(fs_info->addon_fields->allocate_addon_buf(param->m_addon_length)))
|
|
goto err; /* purecov: inspected */
|
|
|
|
fs_info->read_chunk_descriptors(&chunk_file, num_chunks);
|
|
if (fs_info->merge_chunks.is_null()) goto err; /* purecov: inspected */
|
|
|
|
close_cached_file(&chunk_file);
|
|
|
|
/* Open cached file if it isn't open */
|
|
if (!my_b_inited(outfile) &&
|
|
open_cached_file(outfile, mysql_tmpdir, TEMP_PREFIX, READ_RECORD_BUFFER,
|
|
MYF(MY_WME)))
|
|
goto err;
|
|
if (reinit_io_cache(outfile, WRITE_CACHE, 0L, 0, 0)) goto err;
|
|
|
|
param->max_rows_per_buffer = static_cast<uint>(
|
|
fs_info->max_size_in_bytes() / param->max_record_length());
|
|
|
|
Bounds_checked_array<uchar> merge_buf = fs_info->get_contiguous_buffer();
|
|
if (merge_buf.array() == nullptr) {
|
|
my_error(ER_OUT_OF_SORTMEMORY, ME_FATALERROR);
|
|
LogErr(ERROR_LEVEL, ER_SERVER_OUT_OF_SORTMEMORY);
|
|
goto err;
|
|
}
|
|
if (merge_many_buff(thd, param, merge_buf, fs_info->merge_chunks,
|
|
&num_chunks, &tempfile))
|
|
goto err;
|
|
if (flush_io_cache(&tempfile) ||
|
|
reinit_io_cache(&tempfile, READ_CACHE, 0L, 0, 0))
|
|
goto err;
|
|
if (merge_index(
|
|
thd, param, merge_buf,
|
|
Merge_chunk_array(fs_info->merge_chunks.begin(), num_chunks),
|
|
&tempfile, outfile))
|
|
goto err;
|
|
|
|
sort_result->found_records = num_rows_found;
|
|
}
|
|
|
|
if (trace->is_started()) {
|
|
char buffer[100];
|
|
String sort_mode(buffer, sizeof(buffer), &my_charset_bin);
|
|
sort_mode.length(0);
|
|
sort_mode.append("<");
|
|
if (param->using_varlen_keys())
|
|
sort_mode.append("varlen_sort_key");
|
|
else
|
|
sort_mode.append("fixed_sort_key");
|
|
sort_mode.append(", ");
|
|
sort_mode.append(param->using_packed_addons()
|
|
? "packed_additional_fields"
|
|
: param->using_addon_fields() ? "additional_fields"
|
|
: "rowid");
|
|
sort_mode.append(">");
|
|
|
|
const char *algo_text[] = {"none", "std::sort", "std::stable_sort"};
|
|
|
|
Opt_trace_object filesort_summary(trace, "filesort_summary");
|
|
filesort_summary.add("memory_available", memory_available)
|
|
.add("key_size", param->max_compare_length())
|
|
.add("row_size", param->max_record_length())
|
|
.add("max_rows_per_buffer", param->max_rows_per_buffer)
|
|
.add("num_rows_estimate", num_rows_estimate)
|
|
.add("num_rows_found", num_rows_found)
|
|
.add("num_initial_chunks_spilled_to_disk", num_initial_chunks)
|
|
.add("peak_memory_used", fs_info->peak_memory_used())
|
|
.add_alnum("sort_algorithm", algo_text[param->m_sort_algorithm]);
|
|
if (!param->using_packed_addons())
|
|
filesort_summary.add_alnum(
|
|
"unpacked_addon_fields",
|
|
addon_fields_text(param->m_addon_fields_status));
|
|
filesort_summary.add_alnum("sort_mode", sort_mode.c_ptr());
|
|
}
|
|
|
|
if (num_rows_found > param->max_rows) {
|
|
// If read_all_rows() produced more results than the query LIMIT.
|
|
num_rows_found = param->max_rows;
|
|
}
|
|
error = 0;
|
|
|
|
err:
|
|
if (!subselect || !subselect->is_uncacheable()) {
|
|
if (!sort_result->sorted_result_in_fsbuf) fs_info->free_sort_buffer();
|
|
my_free(fs_info->merge_chunks.array());
|
|
fs_info->merge_chunks = Merge_chunk_array(NULL, 0);
|
|
}
|
|
close_cached_file(&tempfile);
|
|
close_cached_file(&chunk_file);
|
|
if (my_b_inited(outfile)) {
|
|
if (flush_io_cache(outfile)) error = 1;
|
|
{
|
|
my_off_t save_pos = outfile->pos_in_file;
|
|
/* For following reads */
|
|
if (reinit_io_cache(outfile, READ_CACHE, 0L, 0, 0)) error = 1;
|
|
outfile->end_of_file = save_pos;
|
|
}
|
|
}
|
|
if (error) {
|
|
DBUG_ASSERT(thd->is_error() || thd->killed);
|
|
|
|
/*
|
|
Guard against Bug#11745656 -- KILL QUERY should not send "server shutdown"
|
|
to client!
|
|
*/
|
|
const char *cause = thd->killed
|
|
? ((thd->killed == THD::KILL_CONNECTION &&
|
|
!connection_events_loop_aborted())
|
|
? ER_THD_NONCONST(thd, THD::KILL_QUERY)
|
|
: ER_THD_NONCONST(thd, thd->killed))
|
|
: thd->get_stmt_da()->message_text();
|
|
const char *msg = ER_THD(thd, ER_FILESORT_TERMINATED);
|
|
|
|
my_printf_error(ER_FILSORT_ABORT, "%s: %s", MYF(0), msg, cause);
|
|
|
|
if (thd->is_fatal_error()) {
|
|
LogEvent()
|
|
.type(LOG_TYPE_ERROR)
|
|
.subsys(LOG_SUBSYSTEM_TAG)
|
|
.prio(INFORMATION_LEVEL)
|
|
.errcode(ER_FILESORT_TERMINATED)
|
|
.user(thd->security_context()->priv_user())
|
|
.host(thd->security_context()->host_or_ip())
|
|
.thread_id(thd->thread_id())
|
|
.message(
|
|
"%s, host: %s, user: %s, thread: %u, error: %s, "
|
|
"query: %-.4096s",
|
|
msg, thd->security_context()->host_or_ip().str,
|
|
thd->security_context()->priv_user().str, thd->thread_id(), cause,
|
|
thd->query().str);
|
|
}
|
|
} else
|
|
thd->inc_status_sort_rows(num_rows_found);
|
|
|
|
return error;
|
|
} /* filesort */
|
|
|
|
void filesort_free_buffers(TABLE *table, bool full) {
|
|
DBUG_TRACE;
|
|
|
|
table->unique_result.sorted_result.reset();
|
|
DBUG_ASSERT(!table->unique_result.sorted_result_in_fsbuf);
|
|
table->unique_result.sorted_result_in_fsbuf = false;
|
|
|
|
if (full) {
|
|
if (table->sorting_iterator != nullptr) {
|
|
table->sorting_iterator->CleanupAfterQuery();
|
|
}
|
|
if (table->duplicate_removal_iterator != nullptr) {
|
|
table->duplicate_removal_iterator->CleanupAfterQuery();
|
|
}
|
|
}
|
|
}
|
|
|
|
Filesort::Filesort(THD *thd, QEP_TAB *tab_arg, ORDER *order, ha_rows limit_arg,
|
|
bool force_stable_sort, bool remove_duplicates,
|
|
bool sort_positions)
|
|
: m_thd(thd),
|
|
qep_tab(tab_arg),
|
|
limit(limit_arg),
|
|
sortorder(NULL),
|
|
using_pq(false),
|
|
m_force_stable_sort(
|
|
force_stable_sort), // keep relative order of equiv. elts
|
|
m_remove_duplicates(remove_duplicates),
|
|
m_force_sort_positions(sort_positions) {
|
|
// Switch to the right slice if applicable, so that we fetch out the correct
|
|
// items from order_arg.
|
|
if (qep_tab->join() != nullptr) {
|
|
DBUG_ASSERT(qep_tab->join()->m_ordered_index_usage !=
|
|
(order == qep_tab->join()->order
|
|
? JOIN::ORDERED_INDEX_ORDER_BY
|
|
: JOIN::ORDERED_INDEX_GROUP_BY));
|
|
Switch_ref_item_slice slice_switch(qep_tab->join(),
|
|
qep_tab->ref_item_slice);
|
|
m_sort_order_length = make_sortorder(order);
|
|
} else {
|
|
m_sort_order_length = make_sortorder(order);
|
|
}
|
|
}
|
|
|
|
uint Filesort::make_sortorder(ORDER *order) {
|
|
uint count;
|
|
st_sort_field *sort, *pos;
|
|
ORDER *ord;
|
|
DBUG_TRACE;
|
|
|
|
count = 0;
|
|
for (ord = order; ord; ord = ord->next) count++;
|
|
DBUG_ASSERT(count > 0);
|
|
|
|
const size_t sortorder_size = sizeof(*sortorder) * (count + 1);
|
|
if (sortorder == nullptr)
|
|
sortorder =
|
|
static_cast<st_sort_field *>((*THR_MALLOC)->Alloc(sortorder_size));
|
|
if (sortorder == nullptr) return 0; /* purecov: inspected */
|
|
memset(sortorder, 0, sortorder_size);
|
|
|
|
pos = sort = sortorder;
|
|
for (ord = order; ord; ord = ord->next, pos++) {
|
|
Item *const item = ord->item[0], *const real_item = item->real_item();
|
|
if (real_item->type() == Item::FIELD_ITEM) {
|
|
/*
|
|
Could be a field, or Item_view_ref/Item_ref wrapping a field
|
|
If it is an Item_outer_ref, only_full_group_by has been switched off.
|
|
*/
|
|
DBUG_ASSERT(
|
|
item->type() == Item::FIELD_ITEM ||
|
|
(item->type() == Item::REF_ITEM &&
|
|
(down_cast<Item_ref *>(item)->ref_type() == Item_ref::VIEW_REF ||
|
|
down_cast<Item_ref *>(item)->ref_type() == Item_ref::OUTER_REF ||
|
|
down_cast<Item_ref *>(item)->ref_type() == Item_ref::REF)));
|
|
pos->field = down_cast<Item_field *>(real_item)->field;
|
|
} else if (real_item->type() == Item::SUM_FUNC_ITEM &&
|
|
!real_item->const_item()) {
|
|
// Aggregate, or Item_aggregate_ref
|
|
DBUG_ASSERT(item->type() == Item::SUM_FUNC_ITEM ||
|
|
(item->type() == Item::REF_ITEM &&
|
|
static_cast<Item_ref *>(item)->ref_type() ==
|
|
Item_ref::AGGREGATE_REF));
|
|
pos->field = item->get_tmp_table_field();
|
|
} else if (real_item->type() == Item::COPY_STR_ITEM) { // Blob patch
|
|
pos->item = static_cast<Item_copy *>(real_item)->get_item();
|
|
} else
|
|
pos->item = item;
|
|
pos->reverse = (ord->direction == ORDER_DESC);
|
|
DBUG_ASSERT(pos->field != NULL || pos->item != NULL);
|
|
DBUG_PRINT("info", ("sorting on %s: %s", (pos->field ? "field" : "item"),
|
|
(pos->field ? pos->field->field_name : "")));
|
|
}
|
|
return count;
|
|
}
|
|
|
|
void Filesort_info::read_chunk_descriptors(IO_CACHE *chunk_file, uint count) {
|
|
DBUG_TRACE;
|
|
|
|
// If we already have a chunk array, we're doing sort in a subquery.
|
|
if (!merge_chunks.is_null() && merge_chunks.size() < count) {
|
|
my_free(merge_chunks.array()); /* purecov: inspected */
|
|
merge_chunks = Merge_chunk_array(NULL, 0); /* purecov: inspected */
|
|
}
|
|
|
|
void *rawmem = merge_chunks.array();
|
|
const size_t length = sizeof(Merge_chunk) * count;
|
|
if (NULL == rawmem) {
|
|
rawmem = my_malloc(key_memory_Filesort_info_merge, length, MYF(MY_WME));
|
|
if (rawmem == NULL) return; /* purecov: inspected */
|
|
}
|
|
|
|
if (reinit_io_cache(chunk_file, READ_CACHE, 0L, 0, 0) ||
|
|
my_b_read(chunk_file, static_cast<uchar *>(rawmem), length)) {
|
|
my_free(rawmem); /* purecov: inspected */
|
|
rawmem = NULL; /* purecov: inspected */
|
|
count = 0; /* purecov: inspected */
|
|
}
|
|
|
|
merge_chunks = Merge_chunk_array(static_cast<Merge_chunk *>(rawmem), count);
|
|
}
|
|
|
|
#ifndef DBUG_OFF
|
|
/*
|
|
Print a text, SQL-like record representation into dbug trace.
|
|
|
|
Note: this function is a work in progress: at the moment
|
|
- column read bitmap is ignored (can print garbage for unused columns)
|
|
- there is no quoting
|
|
*/
|
|
static void dbug_print_record(TABLE *table, bool print_rowid) {
|
|
char buff[1024];
|
|
Field **pfield;
|
|
String tmp(buff, sizeof(buff), &my_charset_bin);
|
|
DBUG_LOCK_FILE;
|
|
|
|
fprintf(DBUG_FILE, "record (");
|
|
for (pfield = table->field; *pfield; pfield++)
|
|
fprintf(DBUG_FILE, "%s%s", (*pfield)->field_name, (pfield[1]) ? ", " : "");
|
|
fprintf(DBUG_FILE, ") = ");
|
|
|
|
fprintf(DBUG_FILE, "(");
|
|
for (pfield = table->field; *pfield; pfield++) {
|
|
Field *field = *pfield;
|
|
|
|
if (field->is_null()) {
|
|
if (fwrite("NULL", sizeof(char), 4, DBUG_FILE) != 4) {
|
|
goto unlock_file_and_quit;
|
|
}
|
|
}
|
|
|
|
if (field->type() == MYSQL_TYPE_BIT)
|
|
(void)field->val_int_as_str(&tmp, 1);
|
|
else
|
|
field->val_str(&tmp);
|
|
|
|
if (fwrite(tmp.ptr(), sizeof(char), tmp.length(), DBUG_FILE) !=
|
|
tmp.length()) {
|
|
goto unlock_file_and_quit;
|
|
}
|
|
|
|
if (pfield[1]) {
|
|
if (fwrite(", ", sizeof(char), 2, DBUG_FILE) != 2) {
|
|
goto unlock_file_and_quit;
|
|
}
|
|
}
|
|
}
|
|
fprintf(DBUG_FILE, ")");
|
|
if (print_rowid) {
|
|
fprintf(DBUG_FILE, " rowid ");
|
|
for (uint i = 0; i < table->file->ref_length; i++) {
|
|
fprintf(DBUG_FILE, "%x", table->file->ref[i]);
|
|
}
|
|
}
|
|
fprintf(DBUG_FILE, "\n");
|
|
unlock_file_and_quit:
|
|
DBUG_UNLOCK_FILE;
|
|
}
|
|
#endif
|
|
|
|
/// Error handler for filesort.
|
|
class Filesort_error_handler : public Internal_error_handler {
|
|
THD *m_thd; ///< The THD in which filesort is executed.
|
|
bool m_seen_not_supported; ///< Has a not supported warning has been seen?
|
|
public:
|
|
/**
|
|
Create an error handler and push it onto the error handler
|
|
stack. The handler will be automatically popped from the error
|
|
handler stack when it is destroyed.
|
|
*/
|
|
Filesort_error_handler(THD *thd) : m_thd(thd), m_seen_not_supported(false) {
|
|
thd->push_internal_handler(this);
|
|
}
|
|
|
|
/**
|
|
Pop the error handler from the error handler stack, and destroy
|
|
it.
|
|
*/
|
|
~Filesort_error_handler() { m_thd->pop_internal_handler(); }
|
|
|
|
/**
|
|
Handle a condition.
|
|
|
|
The handler will make sure that no more than a single
|
|
ER_NOT_SUPPORTED_YET warning will be seen by the higher
|
|
layers. This warning is generated by Json_wrapper::make_sort_key()
|
|
for every value that it doesn't know how to create a sort key
|
|
for. It is sufficient for the higher layers to report this warning
|
|
only once per sort.
|
|
*/
|
|
virtual bool handle_condition(THD *, uint sql_errno, const char *,
|
|
Sql_condition::enum_severity_level *level,
|
|
const char *) {
|
|
if (*level == Sql_condition::SL_WARNING &&
|
|
sql_errno == ER_NOT_SUPPORTED_YET) {
|
|
if (m_seen_not_supported) return true;
|
|
m_seen_not_supported = true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
};
|
|
|
|
static bool alloc_and_make_sortkey(Sort_param *param, Filesort_info *fs_info,
|
|
uchar *ref_pos, size_t *key_length) {
|
|
size_t min_bytes = 1;
|
|
for (;;) { // Termination condition within loop.
|
|
Bounds_checked_array<uchar> sort_key_buf =
|
|
fs_info->get_next_record_pointer(min_bytes);
|
|
if (sort_key_buf.array() == nullptr) return true;
|
|
const uint rec_sz = param->make_sortkey(sort_key_buf, ref_pos);
|
|
if (rec_sz > sort_key_buf.size()) {
|
|
// The record wouldn't fit. Try again, asking for a larger buffer.
|
|
min_bytes = sort_key_buf.size() + 1;
|
|
} else {
|
|
fs_info->commit_used_memory(rec_sz);
|
|
*key_length = rec_sz;
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
Read all rows, and write them into a temporary file
|
|
(if we run out of space in the sort buffer).
|
|
All produced sequences are guaranteed to be non-empty.
|
|
|
|
@param thd Thread handle
|
|
@param param Sorting parameter
|
|
@param qep_tab Parameters for which data to read (see
|
|
source_iterator).
|
|
@param fs_info Struct containing sort buffer etc.
|
|
@param chunk_file File to write Merge_chunks describing sorted segments
|
|
in tempfile.
|
|
@param tempfile File to write sorted sequences of sortkeys to.
|
|
@param pq If !NULL, use it for keeping top N elements
|
|
@param source_iterator Where to read the rows to be sorted from.
|
|
@param [out] found_rows The number of FOUND_ROWS().
|
|
For a query with LIMIT, this value will typically
|
|
be larger than the function return value.
|
|
@param [out] longest_key The largest single key found, in bytes.
|
|
|
|
@note
|
|
Basic idea:
|
|
@verbatim
|
|
while (get_next_sortkey())
|
|
{
|
|
if (using priority queue)
|
|
push sort key into queue
|
|
else
|
|
{
|
|
try to put sort key into buffer;
|
|
if (no free space in sort buffer)
|
|
{
|
|
do {
|
|
allocate new, larger buffer;
|
|
retry putting sort key into buffer;
|
|
} until (record fits or no space for new buffer)
|
|
if (no space for new buffer)
|
|
{
|
|
sort record pointers (all buffers);
|
|
dump sorted sequence to 'tempfile';
|
|
dump Merge_chunk describing sequence location into 'chunk_file';
|
|
}
|
|
}
|
|
if (key was packed)
|
|
tell sort buffer the actual number of bytes used;
|
|
}
|
|
}
|
|
if (buffer has some elements && dumped at least once)
|
|
sort-dump-dump as above;
|
|
else
|
|
don't sort, leave sort buffer to be sorted by caller.
|
|
@endverbatim
|
|
|
|
@returns
|
|
Number of records written on success.
|
|
@returns
|
|
HA_POS_ERROR on error.
|
|
*/
|
|
|
|
static ha_rows read_all_rows(
|
|
THD *thd, Sort_param *param, QEP_TAB *qep_tab, Filesort_info *fs_info,
|
|
IO_CACHE *chunk_file, IO_CACHE *tempfile,
|
|
Bounded_queue<uchar *, uchar *, Sort_param, Mem_compare_queue_key> *pq,
|
|
RowIterator *source_iterator, ha_rows *found_rows, size_t *longest_key) {
|
|
/*
|
|
Set up an error handler for filesort. It is automatically pushed
|
|
onto the internal error handler stack upon creation, and will be
|
|
popped off the stack automatically when the handler goes out of
|
|
scope.
|
|
*/
|
|
Filesort_error_handler error_handler(thd);
|
|
|
|
DBUG_TRACE;
|
|
|
|
int error = 0;
|
|
TABLE *sort_form = param->sort_form;
|
|
handler *file = sort_form->file;
|
|
*found_rows = 0;
|
|
size_t longest_key_so_far = 0;
|
|
uchar *ref_pos = &file->ref[0];
|
|
|
|
// Now modify the read bitmaps, so that we are sure to get the rows
|
|
// that we need for the sort (ie., the fields to sort on) as well as
|
|
// the actual fields we want to return. We need to do this after Init()
|
|
// has run, as Init() may want to set its own bitmaps and we don't want
|
|
// it to overwrite ours. This is fairly ugly, though; we could end up
|
|
// setting fields that the access method doesn't actually need (e.g.
|
|
// if we set a condition that the access method can satisfy using an
|
|
// index only), and in theory also clear fields it _would_ need, although
|
|
// the latter should never happen in practice. A better solution would
|
|
// involve communicating which extra fields we need down to the
|
|
// RowIterator, instead of just overwriting the read set.
|
|
|
|
/* Remember original bitmaps */
|
|
MY_BITMAP *save_read_set = sort_form->read_set;
|
|
MY_BITMAP *save_write_set = sort_form->write_set;
|
|
/*
|
|
Set up temporary column read map for columns used by sort and verify
|
|
it's not used
|
|
*/
|
|
DBUG_ASSERT(sort_form->tmp_set.n_bits == 0 ||
|
|
bitmap_is_clear_all(&sort_form->tmp_set));
|
|
|
|
// Temporary set for register_used_fields and mark_field_in_map()
|
|
sort_form->read_set = &sort_form->tmp_set;
|
|
// Include fields used for sorting in the read_set.
|
|
register_used_fields(param);
|
|
|
|
// Include fields used by conditions in the read_set.
|
|
if (qep_tab->condition()) {
|
|
Mark_field mf(sort_form, MARK_COLUMNS_TEMP);
|
|
qep_tab->condition()->walk(&Item::mark_field_in_map,
|
|
enum_walk::SUBQUERY_POSTFIX, (uchar *)&mf);
|
|
}
|
|
if (qep_tab->having) {
|
|
Mark_field mf(sort_form, MARK_COLUMNS_TEMP);
|
|
qep_tab->having->walk(&Item::mark_field_in_map, enum_walk::SUBQUERY_POSTFIX,
|
|
(uchar *)&mf);
|
|
}
|
|
// Include fields used by pushed conditions in the read_set.
|
|
if (qep_tab->table()->file->pushed_idx_cond) {
|
|
Mark_field mf(sort_form, MARK_COLUMNS_TEMP);
|
|
qep_tab->table()->file->pushed_idx_cond->walk(
|
|
&Item::mark_field_in_map, enum_walk::SUBQUERY_POSTFIX, (uchar *)&mf);
|
|
}
|
|
sort_form->column_bitmaps_set(&sort_form->tmp_set, &sort_form->tmp_set);
|
|
|
|
DEBUG_SYNC(thd, "after_index_merge_phase1");
|
|
ha_rows num_total_records = 0, num_records_this_chunk = 0;
|
|
uint num_written_chunks = 0;
|
|
if (pq == nullptr) {
|
|
fs_info->reset();
|
|
fs_info->clear_peak_memory_used();
|
|
}
|
|
|
|
source_iterator->StartPSIBatchMode();
|
|
for (;;) {
|
|
DBUG_EXECUTE_IF("bug19656296", DBUG_SET("+d,ha_rnd_next_deadlock"););
|
|
if ((error = source_iterator->Read())) {
|
|
break;
|
|
}
|
|
// Note where we are, for the case where we are not using addon fields.
|
|
if (!param->using_addon_fields()) {
|
|
file->position(sort_form->record[0]);
|
|
}
|
|
DBUG_EXECUTE_IF("debug_filesort", dbug_print_record(sort_form, true););
|
|
|
|
if (thd->killed) {
|
|
DBUG_PRINT("info", ("Sort killed by user"));
|
|
num_total_records = HA_POS_ERROR;
|
|
goto cleanup;
|
|
}
|
|
|
|
++(*found_rows);
|
|
num_total_records++;
|
|
if (pq)
|
|
pq->push(ref_pos);
|
|
else {
|
|
size_t key_length;
|
|
bool out_of_mem =
|
|
alloc_and_make_sortkey(param, fs_info, ref_pos, &key_length);
|
|
if (out_of_mem) {
|
|
// Out of room, so flush chunk to disk (if there's anything to flush).
|
|
if (num_records_this_chunk > 0) {
|
|
if (write_keys(param, fs_info, num_records_this_chunk, chunk_file,
|
|
tempfile)) {
|
|
num_total_records = HA_POS_ERROR;
|
|
goto cleanup;
|
|
}
|
|
num_records_this_chunk = 0;
|
|
num_written_chunks++;
|
|
fs_info->reset();
|
|
|
|
// Now we should have room for a new row.
|
|
out_of_mem =
|
|
alloc_and_make_sortkey(param, fs_info, ref_pos, &key_length);
|
|
}
|
|
|
|
// If we're still out of memory after flushing to disk, give up.
|
|
if (out_of_mem) {
|
|
my_error(ER_OUT_OF_SORTMEMORY, ME_FATALERROR);
|
|
LogErr(ERROR_LEVEL, ER_SERVER_OUT_OF_SORTMEMORY);
|
|
num_total_records = HA_POS_ERROR;
|
|
goto cleanup;
|
|
}
|
|
}
|
|
|
|
longest_key_so_far = max(longest_key_so_far, key_length);
|
|
num_records_this_chunk++;
|
|
}
|
|
/* It does not make sense to read more keys in case of a fatal error */
|
|
if (thd->is_error()) break;
|
|
}
|
|
|
|
if (thd->is_error()) {
|
|
num_total_records = HA_POS_ERROR;
|
|
goto cleanup;
|
|
}
|
|
|
|
/* Signal we should use orignal column read and write maps */
|
|
sort_form->column_bitmaps_set(save_read_set, save_write_set);
|
|
|
|
DBUG_PRINT("test",
|
|
("error: %d num_written_chunks: %d", error, num_written_chunks));
|
|
if (error == 1) {
|
|
num_total_records = HA_POS_ERROR;
|
|
goto cleanup;
|
|
}
|
|
if (num_written_chunks != 0 && num_records_this_chunk != 0 &&
|
|
write_keys(param, fs_info, num_records_this_chunk, chunk_file,
|
|
tempfile)) {
|
|
num_total_records = HA_POS_ERROR; // purecov: inspected
|
|
goto cleanup;
|
|
}
|
|
|
|
cleanup:
|
|
source_iterator->EndPSIBatchModeIfStarted();
|
|
|
|
// Clear tmp_set so it can be used elsewhere
|
|
bitmap_clear_all(&sort_form->tmp_set);
|
|
|
|
DBUG_PRINT("info", ("read_all_rows return %lu", (ulong)num_total_records));
|
|
|
|
*longest_key = longest_key_so_far;
|
|
return num_total_records;
|
|
} /* read_all_rows */
|
|
|
|
/**
|
|
@details
|
|
Sort the buffer and write:
|
|
-# the sorted sequence to tempfile
|
|
-# a Merge_chunk describing the sorted sequence position to chunk_file
|
|
|
|
@param param Sort parameters
|
|
@param fs_info Contains the buffer to be sorted and written.
|
|
@param count Number of records to write.
|
|
@param chunk_file One 'Merge_chunk' struct will be written into this file.
|
|
The Merge_chunk::{file_pos, count} will indicate where
|
|
the sorted data was stored.
|
|
@param tempfile The sorted sequence will be written into this file.
|
|
|
|
@returns
|
|
0 OK
|
|
@returns
|
|
1 Error
|
|
*/
|
|
|
|
static int write_keys(Sort_param *param, Filesort_info *fs_info, uint count,
|
|
IO_CACHE *chunk_file, IO_CACHE *tempfile) {
|
|
Merge_chunk merge_chunk;
|
|
DBUG_TRACE;
|
|
|
|
count = fs_info->sort_buffer(param, count);
|
|
|
|
if (!my_b_inited(chunk_file) &&
|
|
open_cached_file(chunk_file, mysql_tmpdir, TEMP_PREFIX, DISK_BUFFER_SIZE,
|
|
MYF(MY_WME)))
|
|
return 1;
|
|
|
|
if (!my_b_inited(tempfile) &&
|
|
open_cached_file(tempfile, mysql_tmpdir, TEMP_PREFIX, DISK_BUFFER_SIZE,
|
|
MYF(MY_WME)))
|
|
return 1; /* purecov: inspected */
|
|
|
|
// Check that we wont have more chunks than we can possibly keep in memory.
|
|
if (my_b_tell(chunk_file) + sizeof(Merge_chunk) > (ulonglong)UINT_MAX)
|
|
return 1; /* purecov: inspected */
|
|
|
|
merge_chunk.set_file_position(my_b_tell(tempfile));
|
|
if (static_cast<ha_rows>(count) > param->max_rows) {
|
|
// Write only SELECT LIMIT rows to the file
|
|
count = static_cast<uint>(param->max_rows); /* purecov: inspected */
|
|
}
|
|
merge_chunk.set_rowcount(static_cast<ha_rows>(count));
|
|
|
|
for (uint ix = 0; ix < count; ++ix) {
|
|
uchar *record = fs_info->get_sorted_record(ix);
|
|
size_t rec_length = param->get_record_length(record);
|
|
|
|
if (my_b_write(tempfile, record, rec_length))
|
|
return 1; /* purecov: inspected */
|
|
}
|
|
|
|
if (my_b_write(chunk_file, pointer_cast<uchar *>(&merge_chunk),
|
|
sizeof(merge_chunk)))
|
|
return 1; /* purecov: inspected */
|
|
|
|
return 0;
|
|
} /* write_keys */
|
|
|
|
#ifdef WORDS_BIGENDIAN
|
|
const bool Is_big_endian = true;
|
|
#else
|
|
const bool Is_big_endian = false;
|
|
#endif
|
|
static void copy_native_longlong(uchar *to, size_t to_length, longlong val,
|
|
bool is_unsigned) {
|
|
copy_integer<Is_big_endian>(to, to_length,
|
|
static_cast<uchar *>(static_cast<void *>(&val)),
|
|
sizeof(longlong), is_unsigned);
|
|
}
|
|
|
|
/**
|
|
Make a sort key for the JSON value in an Item.
|
|
|
|
This function is called by Sort_param::make_sortkey(). We don't want
|
|
it to be inlined, since that seemed to have a negative impact on
|
|
some performance tests.
|
|
|
|
@param[in] item The item for which to create a sort key.
|
|
|
|
@param[out] to Pointer into the buffer to which the sort key should
|
|
be written. It will point to where the data portion
|
|
of the key should start.
|
|
|
|
@param[out] null_indicator
|
|
For nullable items, the NULL indicator byte.
|
|
(Ignored otherwise.) Should be initialized by the
|
|
caller to a value that indicates not NULL.
|
|
|
|
@param[in] length The length of the sort key, not including the NULL
|
|
indicator byte at the beginning of the sort key for
|
|
nullable items.
|
|
|
|
@param[in,out] hash The hash key of the JSON values in the current row.
|
|
|
|
@returns
|
|
length of the key stored
|
|
*/
|
|
NO_INLINE
|
|
static uint make_json_sort_key(Item *item, uchar *to, uchar *null_indicator,
|
|
size_t length, ulonglong *hash) {
|
|
DBUG_ASSERT(!item->maybe_null || *null_indicator == 1);
|
|
|
|
Json_wrapper wr;
|
|
if (item->val_json(&wr)) {
|
|
// An error occurred, no point to continue making key, set it to null.
|
|
if (item->maybe_null) *null_indicator = 0;
|
|
return 0;
|
|
}
|
|
|
|
if (item->null_value) {
|
|
/*
|
|
Got NULL. The sort key should be all zeros. The caller has
|
|
already tentatively set the NULL indicator byte at *null_indicator to
|
|
not-NULL, so we need to clear that byte too.
|
|
*/
|
|
if (item->maybe_null) {
|
|
// Don't store anything but null flag.
|
|
*null_indicator = 0;
|
|
return 0;
|
|
}
|
|
/* purecov: begin inspected */
|
|
DBUG_PRINT("warning", ("Got null on something that shouldn't be null"));
|
|
DBUG_ASSERT(false);
|
|
return 0;
|
|
/* purecov: end */
|
|
}
|
|
|
|
size_t actual_length = wr.make_sort_key(to, length);
|
|
*hash = wr.make_hash_key(*hash);
|
|
return actual_length;
|
|
}
|
|
|
|
namespace {
|
|
|
|
/*
|
|
Returns true if writing the given uint8_t would overflow <to> past <to_end>.
|
|
Writes the value and advances <to> otherwise.
|
|
*/
|
|
inline bool write_uint8_overflows(uint8_t val, uchar *to_end, uchar **to) {
|
|
if (to_end - *to < 1) return true;
|
|
**to = val;
|
|
(*to)++;
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
Returns true if writing <num_bytes> zeros would overflow <to> past <to_end>.
|
|
Writes the zeros and advances <to> otherwise.
|
|
*/
|
|
inline bool clear_overflows(size_t num_bytes, uchar *to_end, uchar **to) {
|
|
if (static_cast<size_t>(to_end - *to) < num_bytes) return true;
|
|
memset(*to, 0, num_bytes);
|
|
*to += num_bytes;
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
Returns true if advancing <to> by <num_bytes> would put it past <to_end>.
|
|
Advances <to> otherwise (does not write anything to the buffer).
|
|
*/
|
|
inline bool advance_overflows(size_t num_bytes, uchar *to_end, uchar **to) {
|
|
if (static_cast<size_t>(to_end - *to) < num_bytes) return true;
|
|
*to += num_bytes;
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
Writes a NULL indicator byte (if the field may be NULL), leaves space for a
|
|
varlength prefix (if varlen and not NULL), and then the actual sort key.
|
|
Returns the length of the key, sans NULL indicator byte and varlength prefix,
|
|
or UINT_MAX if the value would not provably fit within the given bounds.
|
|
*/
|
|
size_t make_sortkey_from_field(Field *field, Nullable<size_t> dst_length,
|
|
uchar *to, uchar *to_end, bool *maybe_null) {
|
|
bool is_varlen = !dst_length.has_value();
|
|
|
|
*maybe_null = field->maybe_null();
|
|
if (field->maybe_null()) {
|
|
if (write_uint8_overflows(field->is_null() ? 0 : 1, to_end, &to))
|
|
return UINT_MAX;
|
|
if (field->is_null()) {
|
|
if (is_varlen) {
|
|
// Don't store anything except the NULL flag.
|
|
return 0;
|
|
}
|
|
if (clear_overflows(dst_length.value(), to_end, &to)) return UINT_MAX;
|
|
return dst_length.value();
|
|
}
|
|
}
|
|
|
|
size_t actual_length;
|
|
if (is_varlen) {
|
|
if (advance_overflows(VARLEN_PREFIX, to_end, &to)) return UINT_MAX;
|
|
size_t max_length = to_end - to;
|
|
if (max_length % 2 != 0) {
|
|
// Heed the contract that strnxfrm needs an even number of bytes.
|
|
--max_length;
|
|
}
|
|
actual_length = field->make_sort_key(to, max_length);
|
|
if (actual_length >= max_length) {
|
|
/*
|
|
The sort key either fit perfectly, or overflowed; we can't distinguish
|
|
between the two, so we have to count it as overflow.
|
|
*/
|
|
return UINT_MAX;
|
|
}
|
|
} else {
|
|
if (static_cast<size_t>(to_end - to) < dst_length.value()) return UINT_MAX;
|
|
actual_length = field->make_sort_key(to, dst_length.value());
|
|
DBUG_ASSERT(actual_length == dst_length.value());
|
|
}
|
|
return actual_length;
|
|
}
|
|
|
|
/*
|
|
Writes a NULL indicator byte (if the field may be NULL), leaves space for a
|
|
varlength prefix (if varlen and not NULL), and then the actual sort key.
|
|
Returns the length of the key, sans NULL indicator byte and varlength prefix,
|
|
or UINT_MAX if the value would not provably fit within the given bounds.
|
|
*/
|
|
size_t make_sortkey_from_item(Item *item, Item_result result_type,
|
|
Nullable<size_t> dst_length, String *tmp_buffer,
|
|
uchar *to, uchar *to_end, bool *maybe_null,
|
|
ulonglong *hash) {
|
|
bool is_varlen = !dst_length.has_value();
|
|
|
|
uchar *null_indicator = nullptr;
|
|
*maybe_null = item->maybe_null;
|
|
if (item->maybe_null) {
|
|
null_indicator = to;
|
|
/*
|
|
Assume not NULL by default. Will be overwritten if needed.
|
|
Note that we can't check item->null_value at this time,
|
|
because it will only get properly set after a call to val_*().
|
|
*/
|
|
if (write_uint8_overflows(1, to_end, &to)) return UINT_MAX;
|
|
}
|
|
|
|
if (is_varlen) {
|
|
// Check that there is room for the varlen prefix, and advance past it.
|
|
if (advance_overflows(VARLEN_PREFIX, to_end, &to)) return UINT_MAX;
|
|
} else {
|
|
// Check that there is room for the fixed-size value.
|
|
if (static_cast<size_t>(to_end - to) < dst_length.value()) return UINT_MAX;
|
|
}
|
|
|
|
switch (result_type) {
|
|
case STRING_RESULT: {
|
|
if (item->data_type() == MYSQL_TYPE_JSON) {
|
|
DBUG_ASSERT(is_varlen);
|
|
return make_json_sort_key(item, to, null_indicator, to_end - to, hash);
|
|
}
|
|
|
|
const CHARSET_INFO *cs = item->collation.collation;
|
|
|
|
String *res = item->val_str(tmp_buffer);
|
|
if (res == nullptr) // Value is NULL.
|
|
{
|
|
DBUG_ASSERT(item->maybe_null);
|
|
if (is_varlen) {
|
|
// Don't store anything except the NULL flag.
|
|
return 0;
|
|
}
|
|
*null_indicator = 0;
|
|
memset(to, 0, dst_length.value());
|
|
return dst_length.value();
|
|
}
|
|
|
|
uint src_length = static_cast<uint>(res->length());
|
|
const char *from = res->ptr();
|
|
|
|
size_t actual_length;
|
|
if (is_varlen) {
|
|
size_t max_length = to_end - to;
|
|
if (max_length % 2 != 0) {
|
|
// Heed the contract that strnxfrm needs an even number of bytes.
|
|
--max_length;
|
|
}
|
|
actual_length = cs->coll->strnxfrm(
|
|
cs, to, max_length, item->max_char_length(),
|
|
pointer_cast<const uchar *>(from), src_length, 0);
|
|
if (actual_length == max_length) {
|
|
/*
|
|
The sort key eithen fit perfectly, or overflowed; we can't
|
|
distinguish between the two, so we have to count it as overflow.
|
|
*/
|
|
return UINT_MAX;
|
|
}
|
|
} else {
|
|
actual_length = cs->coll->strnxfrm(
|
|
cs, to, dst_length.value(), item->max_char_length(),
|
|
pointer_cast<const uchar *>(from), src_length,
|
|
MY_STRXFRM_PAD_TO_MAXLEN);
|
|
DBUG_ASSERT(actual_length == dst_length.value());
|
|
}
|
|
DBUG_ASSERT(to + actual_length <= to_end);
|
|
return actual_length;
|
|
}
|
|
case INT_RESULT: {
|
|
DBUG_ASSERT(!is_varlen);
|
|
longlong value = item->data_type() == MYSQL_TYPE_TIME
|
|
? item->val_time_temporal()
|
|
: item->is_temporal_with_date()
|
|
? item->val_date_temporal()
|
|
: item->val_int();
|
|
/*
|
|
Note: item->null_value can't be trusted alone here; there are cases
|
|
(for the DATE data type in particular) where we can have
|
|
item->null_value set without maybe_null being set! This really should be
|
|
cleaned up, but until that happens, we need to have a more conservative
|
|
check.
|
|
*/
|
|
if (item->maybe_null && item->null_value) {
|
|
*null_indicator = 0;
|
|
memset(to, 0, dst_length.value());
|
|
} else
|
|
copy_native_longlong(to, dst_length.value(), value,
|
|
item->unsigned_flag);
|
|
return dst_length.value();
|
|
}
|
|
case DECIMAL_RESULT: {
|
|
DBUG_ASSERT(!is_varlen);
|
|
my_decimal dec_buf, *dec_val = item->val_decimal(&dec_buf);
|
|
/*
|
|
Note: item->null_value can't be trusted alone here; there are cases
|
|
where we can have item->null_value set without maybe_null being set!
|
|
(There are also cases where dec_val can return non-nullptr even in
|
|
the case of a NULL result.) This really should be cleaned up, but until
|
|
that happens, we need to have a more conservative check.
|
|
*/
|
|
if (item->maybe_null && item->null_value) {
|
|
*null_indicator = 0;
|
|
memset(to, 0, dst_length.value());
|
|
} else if (dst_length.value() < DECIMAL_MAX_FIELD_SIZE) {
|
|
uchar buf[DECIMAL_MAX_FIELD_SIZE];
|
|
my_decimal2binary(E_DEC_FATAL_ERROR, dec_val, buf,
|
|
item->max_length - (item->decimals ? 1 : 0),
|
|
item->decimals);
|
|
memcpy(to, buf, dst_length.value());
|
|
} else {
|
|
my_decimal2binary(E_DEC_FATAL_ERROR, dec_val, to,
|
|
item->max_length - (item->decimals ? 1 : 0),
|
|
item->decimals);
|
|
}
|
|
return dst_length.value();
|
|
}
|
|
case REAL_RESULT: {
|
|
DBUG_ASSERT(!is_varlen);
|
|
double value = item->val_real();
|
|
if (item->null_value) {
|
|
DBUG_ASSERT(item->maybe_null);
|
|
*null_indicator = 0;
|
|
memset(to, 0, dst_length.value());
|
|
} else if (dst_length.value() < sizeof(double)) {
|
|
uchar buf[sizeof(double)];
|
|
change_double_for_sort(value, buf);
|
|
memcpy(to, buf, dst_length.value());
|
|
} else {
|
|
change_double_for_sort(value, to);
|
|
}
|
|
return dst_length.value();
|
|
}
|
|
case ROW_RESULT:
|
|
default:
|
|
// This case should never be choosen
|
|
DBUG_ASSERT(0);
|
|
return dst_length.value();
|
|
}
|
|
}
|
|
|
|
} // namespace
|
|
|
|
uint Sort_param::make_sortkey(Bounds_checked_array<uchar> dst,
|
|
const uchar *ref_pos) {
|
|
uchar *to = dst.array();
|
|
uchar *to_end = dst.array() + dst.size();
|
|
uchar *orig_to = to;
|
|
const st_sort_field *sort_field;
|
|
ulonglong hash = 0;
|
|
|
|
if (using_varlen_keys()) {
|
|
to += size_of_varlength_field;
|
|
if (to >= to_end) return UINT_MAX;
|
|
}
|
|
for (sort_field = local_sortorder.begin();
|
|
sort_field != local_sortorder.end(); sort_field++) {
|
|
if (to >= to_end ||
|
|
(!sort_field->is_varlen &&
|
|
static_cast<size_t>(to_end - to) < sort_field->length)) {
|
|
return UINT_MAX;
|
|
}
|
|
|
|
bool maybe_null;
|
|
Nullable<size_t> dst_length;
|
|
if (!sort_field->is_varlen) dst_length = sort_field->length;
|
|
uint actual_length;
|
|
if (sort_field->field) {
|
|
Field *field = sort_field->field;
|
|
DBUG_ASSERT(sort_field->field_type == field->type());
|
|
|
|
actual_length =
|
|
make_sortkey_from_field(field, dst_length, to, to_end, &maybe_null);
|
|
|
|
if (sort_field->field_type == MYSQL_TYPE_JSON) {
|
|
DBUG_ASSERT(use_hash);
|
|
unique_hash(field, &hash);
|
|
}
|
|
} else { // Item
|
|
Item *item = sort_field->item;
|
|
DBUG_ASSERT(sort_field->field_type == item->data_type());
|
|
|
|
actual_length =
|
|
make_sortkey_from_item(item, sort_field->result_type, dst_length,
|
|
&tmp_buffer, to, to_end, &maybe_null, &hash);
|
|
}
|
|
|
|
if (actual_length == UINT_MAX) {
|
|
// Overflow.
|
|
return UINT_MAX;
|
|
}
|
|
|
|
/*
|
|
Now advance past the key that was just written, reversing the parts that
|
|
we need to reverse.
|
|
*/
|
|
|
|
bool is_null = maybe_null && *to == 0;
|
|
if (maybe_null) {
|
|
DBUG_ASSERT(*to == 0 || *to == 1);
|
|
if (sort_field->reverse && is_null) {
|
|
*to = 0xff;
|
|
}
|
|
++to;
|
|
}
|
|
|
|
// Fill out the varlen prefix if it exists.
|
|
if (sort_field->is_varlen && !is_null) {
|
|
int4store(to, actual_length + VARLEN_PREFIX);
|
|
to += VARLEN_PREFIX;
|
|
}
|
|
|
|
// Reverse the key if needed.
|
|
if (sort_field->reverse) {
|
|
while (actual_length--) {
|
|
*to = (uchar)(~*to);
|
|
to++;
|
|
}
|
|
} else {
|
|
to += actual_length;
|
|
}
|
|
}
|
|
|
|
if (use_hash) {
|
|
if (to_end - to < 8) return UINT_MAX;
|
|
int8store(to, hash);
|
|
to += 8;
|
|
}
|
|
|
|
if (using_varlen_keys()) {
|
|
// Store the length of the record as a whole.
|
|
Sort_param::store_varlen_key_length(orig_to,
|
|
static_cast<uint>(to - orig_to));
|
|
}
|
|
|
|
if (using_addon_fields()) {
|
|
/*
|
|
Save field values appended to sorted fields.
|
|
First null bit indicators are appended then field values follow.
|
|
*/
|
|
uchar *nulls = to;
|
|
uchar *p_len = to;
|
|
|
|
Addon_fields_array::const_iterator addonf = addon_fields->begin();
|
|
if (clear_overflows(addonf->offset, to_end, &to)) return UINT_MAX;
|
|
if (addon_fields->using_packed_addons()) {
|
|
for (; addonf != addon_fields->end(); ++addonf) {
|
|
Field *field = addonf->field;
|
|
if (addonf->null_bit && field->is_null()) {
|
|
nulls[addonf->null_offset] |= addonf->null_bit;
|
|
} else {
|
|
to = field->pack(to, field->ptr, to_end - to,
|
|
field->table->s->db_low_byte_first);
|
|
if (to >= to_end) return UINT_MAX;
|
|
}
|
|
}
|
|
Addon_fields::store_addon_length(p_len, to - p_len);
|
|
} else {
|
|
for (; addonf != addon_fields->end(); ++addonf) {
|
|
Field *field = addonf->field;
|
|
if (static_cast<size_t>(to_end - to) < addonf->max_length) {
|
|
return UINT_MAX;
|
|
}
|
|
if (addonf->null_bit && field->is_null()) {
|
|
nulls[addonf->null_offset] |= addonf->null_bit;
|
|
} else {
|
|
uchar *ptr MY_ATTRIBUTE((unused)) = field->pack(
|
|
to, field->ptr, to_end - to, field->table->s->db_low_byte_first);
|
|
DBUG_ASSERT(ptr <= to + addonf->max_length);
|
|
}
|
|
to += addonf->max_length;
|
|
}
|
|
}
|
|
DBUG_PRINT("info", ("make_sortkey %p %u", orig_to,
|
|
static_cast<unsigned>(to - p_len)));
|
|
} else {
|
|
if (static_cast<size_t>(to_end - to) < ref_length) {
|
|
return UINT_MAX;
|
|
}
|
|
|
|
/* Save filepos last */
|
|
memcpy(to, ref_pos, ref_length);
|
|
to += ref_length;
|
|
}
|
|
return to - orig_to;
|
|
}
|
|
|
|
/*
|
|
Register fields used by sorting in the sorted table's read set
|
|
*/
|
|
|
|
static void register_used_fields(Sort_param *param) {
|
|
Bounds_checked_array<st_sort_field>::const_iterator sort_field;
|
|
TABLE *table = param->sort_form;
|
|
MY_BITMAP *bitmap = table->read_set;
|
|
Mark_field mf(table, MARK_COLUMNS_TEMP);
|
|
|
|
for (sort_field = param->local_sortorder.begin();
|
|
sort_field != param->local_sortorder.end(); sort_field++) {
|
|
Field *field;
|
|
if ((field = sort_field->field)) {
|
|
if (field->table == table) {
|
|
bitmap_set_bit(bitmap, field->field_index);
|
|
if (field->is_virtual_gcol()) table->mark_gcol_in_maps(field);
|
|
}
|
|
} else { // Item
|
|
sort_field->item->walk(&Item::mark_field_in_map,
|
|
enum_walk::SUBQUERY_POSTFIX, (uchar *)&mf);
|
|
}
|
|
}
|
|
|
|
if (param->using_addon_fields()) {
|
|
Addon_fields_array::const_iterator addonf = param->addon_fields->begin();
|
|
for (; addonf != param->addon_fields->end(); ++addonf) {
|
|
Field *field = addonf->field;
|
|
bitmap_set_bit(bitmap, field->field_index);
|
|
if (field->is_virtual_gcol()) table->mark_gcol_in_maps(field);
|
|
}
|
|
} else {
|
|
/* Save filepos last */
|
|
table->prepare_for_position();
|
|
}
|
|
}
|
|
|
|
/**
|
|
This function is used only if the entire result set fits in memory.
|
|
|
|
For addon fields, we keep the result in the filesort buffer.
|
|
This saves us a lot of memcpy calls.
|
|
|
|
For row references, we copy the final sorted result into a buffer,
|
|
but we do not copy the actual sort-keys, as they are no longer needed.
|
|
We could have kept the result in the sort buffere here as well,
|
|
but the new buffer - containing only row references - is probably a
|
|
lot smaller.
|
|
|
|
The result data will be unpacked by SortBufferIterator
|
|
or SortBufferIndirectIterator
|
|
|
|
Note that SortBufferIterator does not have access to a Sort_param.
|
|
It does however have access to a Filesort_info, which knows whether
|
|
we have variable sized keys or not.
|
|
TODO: consider templatizing SortBufferIterator on is_varlen or not.
|
|
|
|
@param [in] param Sort parameters.
|
|
@param count Number of records
|
|
@param [in,out] table_sort Information used by SortBufferIterator /
|
|
SortBufferIndirectIterator
|
|
@param [out] sort_result Where to store the actual result
|
|
*/
|
|
static bool save_index(Sort_param *param, uint count, Filesort_info *table_sort,
|
|
Sort_result *sort_result) {
|
|
uchar *to;
|
|
DBUG_TRACE;
|
|
|
|
table_sort->set_sort_length(param->max_compare_length(),
|
|
param->using_varlen_keys());
|
|
|
|
count = table_sort->sort_buffer(param, count);
|
|
sort_result->found_records = count;
|
|
|
|
if (param->using_addon_fields()) {
|
|
sort_result->sorted_result_in_fsbuf = true;
|
|
return 0;
|
|
}
|
|
|
|
sort_result->sorted_result_in_fsbuf = false;
|
|
const size_t buf_size = param->fixed_res_length * count;
|
|
|
|
DBUG_ASSERT(sort_result->sorted_result == NULL);
|
|
sort_result->sorted_result.reset(static_cast<uchar *>(my_malloc(
|
|
key_memory_Filesort_info_record_pointers, buf_size, MYF(MY_WME))));
|
|
if (!(to = sort_result->sorted_result.get()))
|
|
return 1; /* purecov: inspected */
|
|
sort_result->sorted_result_end = sort_result->sorted_result.get() + buf_size;
|
|
|
|
uint res_length = param->fixed_res_length;
|
|
for (uint ix = 0; ix < count; ++ix) {
|
|
uchar *record = table_sort->get_sorted_record(ix);
|
|
uchar *start_of_payload = param->get_start_of_payload(record);
|
|
memcpy(to, start_of_payload, res_length);
|
|
to += res_length;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
Test whether priority queue is worth using to get top elements of an
|
|
ordered result set. If it is, then allocates buffer for required amount of
|
|
records
|
|
|
|
@param trace Current trace context.
|
|
@param param Sort parameters.
|
|
@param filesort_info Filesort information.
|
|
@param num_rows Estimate of number of rows in source record set.
|
|
@param memory_available Memory available for sorting.
|
|
|
|
DESCRIPTION
|
|
Given a query like this:
|
|
SELECT ... FROM t ORDER BY a1,...,an LIMIT max_rows;
|
|
This function tests whether a priority queue can be used to keep
|
|
the result (ie., there is enough memory to store @<max_rows@> rows).
|
|
|
|
@returns
|
|
true - if it's ok to use PQ
|
|
false - or there is not enough memory.
|
|
*/
|
|
|
|
bool check_if_pq_applicable(Opt_trace_context *trace, Sort_param *param,
|
|
Filesort_info *filesort_info, ha_rows num_rows,
|
|
ulong memory_available) {
|
|
DBUG_TRACE;
|
|
|
|
/*
|
|
How much Priority Queue sort is slower than qsort.
|
|
Measurements (see unit test) indicate that PQ is roughly 3 times slower.
|
|
*/
|
|
const double PQ_slowness = 3.0;
|
|
|
|
Opt_trace_object trace_filesort(trace,
|
|
"filesort_priority_queue_optimization");
|
|
if (param->max_rows == HA_POS_ERROR) {
|
|
trace_filesort.add("usable", false)
|
|
.add_alnum("cause", "not applicable (no LIMIT)");
|
|
return false;
|
|
}
|
|
|
|
if (param->m_remove_duplicates) {
|
|
trace_filesort.add("usable", false)
|
|
.add_alnum("cause", "duplicate removal not supported yet");
|
|
return false;
|
|
}
|
|
|
|
trace_filesort.add("limit", param->max_rows);
|
|
|
|
if (param->max_rows + 2 >= UINT_MAX) {
|
|
trace_filesort.add("usable", false).add_alnum("cause", "limit too large");
|
|
return false;
|
|
}
|
|
if (param->max_record_length() >= 0xFFFFFFFFu) {
|
|
trace_filesort.add("usable", false)
|
|
.add_alnum("cause", "contains records of unbounded length");
|
|
return false;
|
|
}
|
|
|
|
ulong num_available_keys =
|
|
memory_available / (param->max_record_length() + sizeof(char *));
|
|
// We need 1 extra record in the buffer, when using PQ.
|
|
param->max_rows_per_buffer = (uint)param->max_rows + 1;
|
|
|
|
if (num_rows < num_available_keys) {
|
|
// The whole source set fits into memory.
|
|
if (param->max_rows < num_rows / PQ_slowness) {
|
|
filesort_info->set_max_size(memory_available, param->max_record_length());
|
|
trace_filesort.add("chosen", true);
|
|
return filesort_info->max_size_in_bytes() > 0;
|
|
} else {
|
|
// PQ will be slower.
|
|
trace_filesort.add("chosen", false).add_alnum("cause", "sort_is_cheaper");
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// Do we have space for LIMIT rows in memory?
|
|
if (param->max_rows_per_buffer < num_available_keys) {
|
|
filesort_info->set_max_size(memory_available, param->max_record_length());
|
|
trace_filesort.add("chosen", true);
|
|
return filesort_info->max_size_in_bytes() > 0;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
Read from a disk file into the merge chunk's buffer. We generally read as
|
|
many complete rows as we can, except when bounded by max_keys() or rowcount().
|
|
Incomplete rows will be left in the file.
|
|
|
|
@returns
|
|
Number of bytes read, or (uint)-1 if something went wrong.
|
|
*/
|
|
static uint read_to_buffer(IO_CACHE *fromfile, Merge_chunk *merge_chunk,
|
|
Sort_param *param) {
|
|
DBUG_TRACE;
|
|
uint rec_length = param->max_record_length();
|
|
ha_rows count;
|
|
|
|
const bool packed_addon_fields = param->using_packed_addons();
|
|
const bool using_varlen_keys = param->using_varlen_keys();
|
|
|
|
if (merge_chunk->rowcount() > 0) {
|
|
size_t bytes_to_read;
|
|
if (packed_addon_fields || using_varlen_keys) {
|
|
count = merge_chunk->rowcount();
|
|
bytes_to_read = min(merge_chunk->buffer_size(),
|
|
static_cast<size_t>(fromfile->end_of_file -
|
|
merge_chunk->file_position()));
|
|
} else {
|
|
count = min(merge_chunk->max_keys(), merge_chunk->rowcount());
|
|
bytes_to_read = rec_length * static_cast<size_t>(count);
|
|
if (count == 0) {
|
|
// Not even room for the first row.
|
|
my_error(ER_OUT_OF_SORTMEMORY, ME_FATALERROR);
|
|
LogErr(ERROR_LEVEL, ER_SERVER_OUT_OF_SORTMEMORY);
|
|
return (uint)-1;
|
|
}
|
|
}
|
|
|
|
DBUG_PRINT("info",
|
|
("read_to_buffer %p at file_pos %llu bytes %llu", merge_chunk,
|
|
static_cast<ulonglong>(merge_chunk->file_position()),
|
|
static_cast<ulonglong>(bytes_to_read)));
|
|
if (mysql_file_pread(fromfile->file, merge_chunk->buffer_start(),
|
|
bytes_to_read, merge_chunk->file_position(), MYF_RW))
|
|
return (uint)-1; /* purecov: inspected */
|
|
|
|
size_t num_bytes_read;
|
|
if (packed_addon_fields || using_varlen_keys) {
|
|
/*
|
|
The last record read is most likely not complete here.
|
|
We need to loop through all the records, reading the length fields,
|
|
and then "chop off" the final incomplete record.
|
|
*/
|
|
uchar *record = merge_chunk->buffer_start();
|
|
uint ix = 0;
|
|
for (; ix < count; ++ix) {
|
|
if (using_varlen_keys &&
|
|
(record + Sort_param::size_of_varlength_field) >=
|
|
merge_chunk->buffer_end())
|
|
break; // Incomplete record.
|
|
|
|
uchar *start_of_payload = param->get_start_of_payload(record);
|
|
if (start_of_payload >= merge_chunk->buffer_end())
|
|
break; // Incomplete record.
|
|
|
|
if (packed_addon_fields &&
|
|
start_of_payload + Addon_fields::size_of_length_field >=
|
|
merge_chunk->buffer_end())
|
|
break; // Incomplete record.
|
|
|
|
const uint res_length =
|
|
packed_addon_fields
|
|
? Addon_fields::read_addon_length(start_of_payload)
|
|
: param->fixed_res_length;
|
|
|
|
if (start_of_payload + res_length >= merge_chunk->buffer_end())
|
|
break; // Incomplete record.
|
|
|
|
DBUG_ASSERT(res_length > 0);
|
|
record = start_of_payload + res_length;
|
|
}
|
|
if (ix == 0) {
|
|
// Not even room for the first row.
|
|
my_error(ER_OUT_OF_SORTMEMORY, ME_FATALERROR);
|
|
LogErr(ERROR_LEVEL, ER_SERVER_OUT_OF_SORTMEMORY);
|
|
return (uint)-1;
|
|
}
|
|
count = ix;
|
|
num_bytes_read = record - merge_chunk->buffer_start();
|
|
DBUG_PRINT("info", ("read %llu bytes of complete records",
|
|
static_cast<ulonglong>(bytes_to_read)));
|
|
} else
|
|
num_bytes_read = bytes_to_read;
|
|
|
|
merge_chunk->init_current_key();
|
|
merge_chunk->advance_file_position(num_bytes_read);
|
|
merge_chunk->decrement_rowcount(count);
|
|
merge_chunk->set_mem_count(count);
|
|
return num_bytes_read;
|
|
}
|
|
|
|
return 0;
|
|
} /* read_to_buffer */
|
|
|
|
namespace {
|
|
|
|
/**
|
|
This struct is used for merging chunks for filesort()
|
|
For filesort() with fixed-size keys we use memcmp to compare rows.
|
|
For variable length keys, we use cmp_varlen_keys to compare rows.
|
|
*/
|
|
struct Merge_chunk_greater {
|
|
size_t m_len;
|
|
Sort_param *m_param;
|
|
|
|
// CTOR for filesort() with fixed-size keys
|
|
explicit Merge_chunk_greater(size_t len) : m_len(len), m_param(nullptr) {}
|
|
|
|
// CTOR for filesort() with varlen keys
|
|
explicit Merge_chunk_greater(Sort_param *param) : m_len(0), m_param(param) {}
|
|
|
|
bool operator()(Merge_chunk *a, Merge_chunk *b) const {
|
|
return key_is_greater_than(a->current_key(), b->current_key());
|
|
}
|
|
|
|
bool key_is_greater_than(uchar *key1, uchar *key2) const {
|
|
// Fixed len keys
|
|
if (m_len) return memcmp(key1, key2, m_len) > 0;
|
|
|
|
if (m_param)
|
|
return !cmp_varlen_keys(m_param->local_sortorder, m_param->use_hash, key1,
|
|
key2);
|
|
|
|
// We can actually have zero-length sort key for filesort().
|
|
return false;
|
|
}
|
|
};
|
|
|
|
} // namespace
|
|
|
|
/**
|
|
Merge buffers to one buffer.
|
|
|
|
@param thd
|
|
@param param Sort parameter
|
|
@param from_file File with source data (Merge_chunks point to this file)
|
|
@param to_file File to write the sorted result data.
|
|
@param sort_buffer Buffer for data to store up to MERGEBUFF2 sort keys.
|
|
@param [out] last_chunk Store here Merge_chunk describing data written to
|
|
to_file.
|
|
@param chunk_array Array of chunks to merge.
|
|
@param include_keys If true, write both the keys and the addons / row
|
|
positions. If false, the keys will be skipped (useful only for the output of
|
|
the final merge, where we don't need to compare rows further).
|
|
|
|
@returns
|
|
0 OK
|
|
@returns
|
|
other error
|
|
*/
|
|
static int merge_buffers(THD *thd, Sort_param *param, IO_CACHE *from_file,
|
|
IO_CACHE *to_file, Sort_buffer sort_buffer,
|
|
Merge_chunk *last_chunk, Merge_chunk_array chunk_array,
|
|
bool include_keys) {
|
|
int error = 0;
|
|
ha_rows max_rows, org_max_rows;
|
|
uchar *strpos;
|
|
Merge_chunk *merge_chunk;
|
|
DBUG_TRACE;
|
|
|
|
thd->inc_status_sort_merge_passes();
|
|
|
|
my_off_t to_start_filepos = my_b_tell(to_file);
|
|
strpos = sort_buffer.array();
|
|
org_max_rows = max_rows = param->max_rows;
|
|
|
|
// Only relevant for fixed-length rows.
|
|
ha_rows maxcount = param->max_rows_per_buffer / chunk_array.size();
|
|
|
|
// If we don't use addon fields, we'll have the record position appended to
|
|
// the end of each record. This disturbs our equality comparisons, so we'll
|
|
// have to remove it. (Removing it also makes the comparisons ever so slightly
|
|
// cheaper.)
|
|
size_t key_len = param->max_compare_length();
|
|
if (!param->using_addon_fields()) {
|
|
key_len -= param->ref_length;
|
|
}
|
|
|
|
Merge_chunk_greater mcl = param->using_varlen_keys()
|
|
? Merge_chunk_greater(param)
|
|
: Merge_chunk_greater(key_len);
|
|
Priority_queue<Merge_chunk *,
|
|
std::vector<Merge_chunk *, Malloc_allocator<Merge_chunk *>>,
|
|
Merge_chunk_greater>
|
|
queue(mcl, Malloc_allocator<Merge_chunk *>(key_memory_Filesort_info_merge));
|
|
|
|
if (queue.reserve(chunk_array.size())) return 1;
|
|
|
|
for (merge_chunk = chunk_array.begin(); merge_chunk != chunk_array.end();
|
|
merge_chunk++) {
|
|
const size_t chunk_sz = sort_buffer.size() / chunk_array.size();
|
|
merge_chunk->set_buffer(strpos, strpos + chunk_sz);
|
|
|
|
merge_chunk->set_max_keys(maxcount);
|
|
strpos += chunk_sz;
|
|
error = static_cast<int>(read_to_buffer(from_file, merge_chunk, param));
|
|
|
|
if (error == -1) return error; /* purecov: inspected */
|
|
// If less data in buffers than expected
|
|
merge_chunk->set_max_keys(merge_chunk->mem_count());
|
|
(void)queue.push(merge_chunk);
|
|
}
|
|
|
|
bool seen_any_records = false; // Used for deduplication only.
|
|
while (queue.size() > 1) {
|
|
if (thd->killed) {
|
|
return 1; /* purecov: inspected */
|
|
}
|
|
for (;;) {
|
|
merge_chunk = queue.top();
|
|
unsigned row_length, payload_length;
|
|
{
|
|
param->get_rec_and_res_len(merge_chunk->current_key(), &row_length,
|
|
&payload_length);
|
|
const uint bytes_to_write = include_keys ? row_length : payload_length;
|
|
unsigned offset = include_keys ? 0 : (row_length - payload_length);
|
|
|
|
bool is_duplicate = false;
|
|
if (param->m_remove_duplicates) {
|
|
if (seen_any_records &&
|
|
!mcl.key_is_greater_than(merge_chunk->current_key(),
|
|
param->m_last_key_seen)) {
|
|
is_duplicate = true;
|
|
} else {
|
|
seen_any_records = true;
|
|
memcpy(param->m_last_key_seen, merge_chunk->current_key(),
|
|
row_length - payload_length);
|
|
}
|
|
}
|
|
|
|
if (!is_duplicate) {
|
|
if (my_b_write(to_file, merge_chunk->current_key() + offset,
|
|
bytes_to_write)) {
|
|
return 1; /* purecov: inspected */
|
|
}
|
|
if (!--max_rows) {
|
|
error = 0; /* purecov: inspected */
|
|
goto end; /* purecov: inspected */
|
|
}
|
|
}
|
|
}
|
|
|
|
merge_chunk->advance_current_key(row_length);
|
|
merge_chunk->decrement_mem_count();
|
|
if (0 == merge_chunk->mem_count()) {
|
|
// No more records in memory for this chunk. Read more, and if there's
|
|
// none, take it out of the queue.
|
|
if (!(error = (int)read_to_buffer(from_file, merge_chunk, param))) {
|
|
queue.pop();
|
|
reuse_freed_buff(merge_chunk, &queue);
|
|
break; /* One buffer have been removed */
|
|
} else if (error == -1)
|
|
return error; /* purecov: inspected */
|
|
}
|
|
/*
|
|
The Merge_chunk at the queue's top had one of its keys consumed, thus
|
|
it may now rank differently in the comparison order of the queue, so:
|
|
*/
|
|
queue.update_top();
|
|
}
|
|
}
|
|
|
|
// Only one chunk left; read all of its records.
|
|
merge_chunk = queue.top();
|
|
merge_chunk->set_buffer(sort_buffer.array(),
|
|
sort_buffer.array() + sort_buffer.size());
|
|
merge_chunk->set_max_keys(param->max_rows_per_buffer);
|
|
|
|
do {
|
|
if (merge_chunk->mem_count() > max_rows) {
|
|
merge_chunk->set_mem_count(max_rows); /* Don't write too many records */
|
|
merge_chunk->set_rowcount(0); /* Don't read more */
|
|
}
|
|
max_rows -= merge_chunk->mem_count();
|
|
|
|
for (uint ix = 0; ix < merge_chunk->mem_count(); ++ix) {
|
|
unsigned row_length, payload_length;
|
|
param->get_rec_and_res_len(merge_chunk->current_key(), &row_length,
|
|
&payload_length);
|
|
const uint bytes_to_write = include_keys ? row_length : payload_length;
|
|
unsigned offset = include_keys ? 0 : (row_length - payload_length);
|
|
|
|
// Since there's only one chunk left, and it does not contain duplicates
|
|
// internally, we only need to check for duplicates on the first
|
|
// iteration of the loop.
|
|
bool is_duplicate =
|
|
(ix == 0 && param->m_remove_duplicates && seen_any_records &&
|
|
!mcl.key_is_greater_than(merge_chunk->current_key(),
|
|
param->m_last_key_seen));
|
|
if (!is_duplicate) {
|
|
if (my_b_write(to_file, merge_chunk->current_key() + offset,
|
|
bytes_to_write)) {
|
|
return 1; /* purecov: inspected */
|
|
}
|
|
}
|
|
merge_chunk->advance_current_key(row_length);
|
|
}
|
|
} while ((error = (int)read_to_buffer(from_file, merge_chunk, param)) != -1 &&
|
|
error != 0);
|
|
|
|
end:
|
|
last_chunk->set_rowcount(min(org_max_rows - max_rows, param->max_rows));
|
|
last_chunk->set_file_position(to_start_filepos);
|
|
|
|
return error;
|
|
} /* merge_buffers */
|
|
|
|
/* Do a merge to output-file (save only positions) */
|
|
|
|
static int merge_index(THD *thd, Sort_param *param, Sort_buffer sort_buffer,
|
|
Merge_chunk_array chunk_array, IO_CACHE *tempfile,
|
|
IO_CACHE *outfile) {
|
|
DBUG_TRACE;
|
|
if (merge_buffers(thd,
|
|
param, // param
|
|
tempfile, // from_file
|
|
outfile, // to_file
|
|
sort_buffer, // sort_buffer
|
|
chunk_array.begin(), // last_chunk [out]
|
|
chunk_array,
|
|
false)) // include_keys
|
|
return 1; /* purecov: inspected */
|
|
return 0;
|
|
} /* merge_index */
|
|
|
|
/**
|
|
Calculate length of sort key.
|
|
|
|
@param thd Thread handler
|
|
@param sortorder Order of items to sort
|
|
@param s_length Number of items to sort
|
|
|
|
@note
|
|
sortorder->length is updated for each sort item.
|
|
|
|
@return
|
|
Total length of sort buffer in bytes
|
|
*/
|
|
|
|
uint sortlength(THD *thd, st_sort_field *sortorder, uint s_length) {
|
|
uint total_length = 0;
|
|
|
|
// Heed the contract that strnxfrm() needs an even number of bytes.
|
|
const uint max_sort_length_even = (thd->variables.max_sort_length + 1) & ~1;
|
|
|
|
for (; s_length--; sortorder++) {
|
|
bool is_string_type = false;
|
|
if (sortorder->field) {
|
|
const Field *field = sortorder->field;
|
|
const CHARSET_INFO *cs = field->sort_charset();
|
|
sortorder->length = field->sort_length();
|
|
sortorder->is_varlen = field->sort_key_is_varlen();
|
|
|
|
// How many bytes do we need (including sort weights) for strnxfrm()?
|
|
if (sortorder->length < (10 << 20)) { // 10 MB.
|
|
sortorder->length = cs->coll->strnxfrmlen(cs, sortorder->length);
|
|
} else {
|
|
/*
|
|
If over 10 MB, just set the length as effectively infinite, so we
|
|
don't get overflows in strnxfrmlen().
|
|
*/
|
|
sortorder->length = 0xFFFFFFFFu;
|
|
}
|
|
|
|
sortorder->maybe_null = field->maybe_null();
|
|
sortorder->field_type = field->type();
|
|
is_string_type =
|
|
field->result_type() == STRING_RESULT && !field->is_temporal();
|
|
} else {
|
|
const Item *item = sortorder->item;
|
|
sortorder->result_type = item->result_type();
|
|
sortorder->field_type = item->data_type();
|
|
if (sortorder->field_type == MYSQL_TYPE_JSON)
|
|
sortorder->is_varlen = true;
|
|
else
|
|
sortorder->is_varlen = false;
|
|
if (item->is_temporal()) sortorder->result_type = INT_RESULT;
|
|
switch (sortorder->result_type) {
|
|
case STRING_RESULT: {
|
|
const CHARSET_INFO *cs = item->collation.collation;
|
|
sortorder->length = item->max_length;
|
|
|
|
if (cs->pad_attribute == NO_PAD) {
|
|
sortorder->is_varlen = true;
|
|
}
|
|
|
|
if (sortorder->length < (10 << 20)) { // 10 MB.
|
|
// How many bytes do we need (including sort weights) for
|
|
// strnxfrm()?
|
|
sortorder->length = cs->coll->strnxfrmlen(cs, sortorder->length);
|
|
} else {
|
|
/*
|
|
If over 10 MB, just set the length as effectively infinite, so we
|
|
don't get overflows in strnxfrmlen().
|
|
*/
|
|
sortorder->length = 0xFFFFFFFFu;
|
|
}
|
|
is_string_type = true;
|
|
break;
|
|
}
|
|
case INT_RESULT:
|
|
#if SIZEOF_LONG_LONG > 4
|
|
sortorder->length = 8; // Size of intern longlong
|
|
#else
|
|
sortorder->length = 4;
|
|
#endif
|
|
break;
|
|
case DECIMAL_RESULT:
|
|
sortorder->length = my_decimal_get_binary_size(
|
|
item->max_length - (item->decimals ? 1 : 0), item->decimals);
|
|
break;
|
|
case REAL_RESULT:
|
|
sortorder->length = sizeof(double);
|
|
break;
|
|
case ROW_RESULT:
|
|
default:
|
|
// This case should never be choosen
|
|
DBUG_ASSERT(0);
|
|
break;
|
|
}
|
|
sortorder->maybe_null = item->maybe_null;
|
|
}
|
|
if (!sortorder->is_varlen && is_string_type) {
|
|
/*
|
|
We would love to never have to care about max_sort_length anymore,
|
|
but that would make it impossible for us to sort blobs (TEXT) with
|
|
PAD SPACE collations, since those are not variable-length (the padding
|
|
is serialized as part of the sort key) and thus require infinite space.
|
|
Thus, as long as we need to sort such fields by storing their sort
|
|
keys, we need to heed max_sort_length for such fields.
|
|
*/
|
|
sortorder->length = std::min(sortorder->length, max_sort_length_even);
|
|
}
|
|
|
|
if (sortorder->maybe_null)
|
|
AddWithSaturate(1u, &total_length); // Place for NULL marker
|
|
if (sortorder->is_varlen)
|
|
AddWithSaturate(VARLEN_PREFIX, &sortorder->length);
|
|
AddWithSaturate(sortorder->length, &total_length);
|
|
}
|
|
sortorder->field = NULL; // end marker
|
|
DBUG_PRINT("info", ("sort_length: %u", total_length));
|
|
return total_length;
|
|
}
|
|
|
|
/**
|
|
Get descriptors of fields appended to sorted fields and
|
|
calculate their total length.
|
|
|
|
The function first finds out what fields are used in the result set.
|
|
Then it calculates the length of the buffer to store the values of
|
|
these fields together with the value of sort values.
|
|
If the calculated length is not greater than max_length_for_sort_data
|
|
the function allocates memory for an array of descriptors containing
|
|
layouts for the values of the non-sorted fields in the buffer and
|
|
fills them.
|
|
|
|
@param max_length_for_sort_data Value of session variable.
|
|
@param ptabfield Array of references to the table fields
|
|
@param sortlength Total length of sorted fields
|
|
@param[out] addon_fields_status Reason for *not* using packed addon fields
|
|
@param[out] plength Total length of appended fields
|
|
@param[out] ppackable_length Total length of appended fields having a
|
|
packable type
|
|
|
|
@note
|
|
The null bits for the appended values are supposed to be put together
|
|
and stored into the buffer just ahead of the value of the first field.
|
|
|
|
@return
|
|
Pointer to the layout descriptors for the appended fields, if any
|
|
@returns
|
|
NULL if we do not store field values with sort data.
|
|
*/
|
|
|
|
Addon_fields *Filesort::get_addon_fields(
|
|
ulong max_length_for_sort_data, Field **ptabfield, uint sortlength,
|
|
Addon_fields_status *addon_fields_status, uint *plength,
|
|
uint *ppackable_length) {
|
|
Field **pfield;
|
|
Field *field;
|
|
uint total_length = 0;
|
|
uint packable_length = 0;
|
|
uint num_fields = 0;
|
|
uint null_fields = 0;
|
|
TABLE *const table = qep_tab->table();
|
|
MY_BITMAP *read_set = table->read_set;
|
|
|
|
// Locate the effective index for the table to be sorted (if any)
|
|
const uint index = qep_tab->effective_index();
|
|
/*
|
|
filter_covering is true if access is via an index that is covering,
|
|
regardless of whether the access is by the covering index or by
|
|
index and base table, since the query has to be fulfilled with fields
|
|
from that index only.
|
|
This information is later used to filter out base columns for virtual
|
|
generated columns, since these are only needed when reading the table.
|
|
During sorting, trust that values for all generated columns have been
|
|
materialized, which means that base columns are no longer necessary.
|
|
*/
|
|
const bool filter_covering = index != MAX_KEY &&
|
|
table->covering_keys.is_set(index) &&
|
|
table->index_contains_some_virtual_gcol(index);
|
|
|
|
/*
|
|
If there is a reference to a field in the query add it
|
|
to the the set of appended fields.
|
|
Note for future refinement:
|
|
This this a too strong condition.
|
|
Actually we need only the fields referred in the
|
|
result set. And for some of them it makes sense to use
|
|
the values directly from sorted fields.
|
|
*/
|
|
*plength = *ppackable_length = 0;
|
|
*addon_fields_status = Addon_fields_status::unknown_status;
|
|
|
|
for (pfield = ptabfield; (field = *pfield); pfield++) {
|
|
if (!bitmap_is_set(read_set, field->field_index)) continue;
|
|
// part_of_key is empty for a BLOB, so apply this check before the next.
|
|
if (field->flags & BLOB_FLAG) {
|
|
DBUG_ASSERT(m_sort_param.addon_fields == NULL);
|
|
*addon_fields_status = Addon_fields_status::row_contains_blob;
|
|
return NULL;
|
|
}
|
|
if (filter_covering && !field->part_of_key.is_set(index))
|
|
continue; // See explanation above filter_covering
|
|
|
|
const uint field_length = field->max_packed_col_length();
|
|
total_length += field_length;
|
|
|
|
const enum_field_types field_type = field->type();
|
|
if (field->maybe_null() || field_type == MYSQL_TYPE_STRING ||
|
|
field_type == MYSQL_TYPE_VARCHAR || field_type == MYSQL_TYPE_VAR_STRING)
|
|
packable_length += field_length;
|
|
if (field->maybe_null()) null_fields++;
|
|
num_fields++;
|
|
}
|
|
if (0 == num_fields) return NULL;
|
|
|
|
total_length += (null_fields + 7) / 8;
|
|
|
|
*ppackable_length = packable_length;
|
|
|
|
if (total_length + sortlength > max_length_for_sort_data) {
|
|
DBUG_ASSERT(m_sort_param.addon_fields == NULL);
|
|
*addon_fields_status = Addon_fields_status::max_length_for_sort_data;
|
|
return NULL;
|
|
}
|
|
|
|
if (m_sort_param.addon_fields == NULL) {
|
|
void *rawmem1 = (*THR_MALLOC)->Alloc(sizeof(Addon_fields));
|
|
void *rawmem2 = (*THR_MALLOC)->Alloc(sizeof(Sort_addon_field) * num_fields);
|
|
if (rawmem1 == NULL || rawmem2 == NULL)
|
|
return NULL; /* purecov: inspected */
|
|
Addon_fields_array addon_array(static_cast<Sort_addon_field *>(rawmem2),
|
|
num_fields);
|
|
m_sort_param.addon_fields = new (rawmem1) Addon_fields(addon_array);
|
|
} else {
|
|
/*
|
|
Allocate memory only once, reuse descriptor array and buffer.
|
|
Set using_packed_addons here, and size/offset details below.
|
|
*/
|
|
DBUG_ASSERT(num_fields ==
|
|
m_sort_param.addon_fields->num_field_descriptors());
|
|
m_sort_param.addon_fields->set_using_packed_addons(false);
|
|
}
|
|
|
|
*plength = total_length;
|
|
|
|
uint length = (null_fields + 7) / 8;
|
|
null_fields = 0;
|
|
Addon_fields_array::iterator addonf = m_sort_param.addon_fields->begin();
|
|
for (pfield = ptabfield; (field = *pfield); pfield++) {
|
|
if (!bitmap_is_set(read_set, field->field_index)) continue;
|
|
if (filter_covering && !field->part_of_key.is_set(index)) continue;
|
|
DBUG_ASSERT(addonf != m_sort_param.addon_fields->end());
|
|
|
|
addonf->field = field;
|
|
addonf->offset = length;
|
|
if (field->maybe_null()) {
|
|
addonf->null_offset = null_fields / 8;
|
|
addonf->null_bit = 1 << (null_fields & 7);
|
|
null_fields++;
|
|
} else {
|
|
addonf->null_offset = 0;
|
|
addonf->null_bit = 0;
|
|
}
|
|
addonf->max_length = field->max_packed_col_length();
|
|
DBUG_PRINT("info", ("addon_field %s max_length %u",
|
|
addonf->field->field_name, addonf->max_length));
|
|
|
|
length += addonf->max_length;
|
|
addonf++;
|
|
}
|
|
|
|
DBUG_PRINT("info", ("addon_length: %d", length));
|
|
*addon_fields_status = Addon_fields_status::using_addon_fields;
|
|
return m_sort_param.addon_fields;
|
|
}
|
|
|
|
bool Filesort::using_addon_fields() {
|
|
m_sort_param.decide_addon_fields(this, qep_tab->table(),
|
|
m_thd->variables.max_length_for_sort_data,
|
|
m_force_sort_positions);
|
|
return m_sort_param.using_addon_fields();
|
|
}
|
|
|
|
/*
|
|
** functions to change a double or float to a sortable string
|
|
** The following should work for IEEE
|
|
*/
|
|
|
|
void change_double_for_sort(double nr, uchar *to) {
|
|
/*
|
|
-0.0 and +0.0 compare identically, so make sure they use exactly the same
|
|
bit pattern.
|
|
*/
|
|
if (nr == 0.0) nr = 0.0;
|
|
|
|
/*
|
|
Positive doubles sort exactly as ints; negative doubles need
|
|
bit flipping. The bit flipping sets the upper bit to 0
|
|
unconditionally, so put 1 in there for positive numbers
|
|
(so they sort later for our unsigned comparison).
|
|
NOTE: This does not sort infinities or NaN correctly.
|
|
*/
|
|
int64 nr_int;
|
|
memcpy(&nr_int, &nr, sizeof(nr));
|
|
nr_int = (nr_int ^ (nr_int >> 63)) | ((~nr_int) & 0x8000000000000000ULL);
|
|
|
|
// TODO: Make store64be() or similar.
|
|
memcpy(to, &nr_int, sizeof(nr_int));
|
|
#if !defined(WORDS_BIGENDIAN)
|
|
using std::swap;
|
|
swap(to[0], to[7]);
|
|
swap(to[1], to[6]);
|
|
swap(to[2], to[5]);
|
|
swap(to[3], to[4]);
|
|
#endif
|
|
}
|