432 lines
14 KiB
C++
432 lines
14 KiB
C++
/* Copyright (c) 2010, 2019, Oracle and/or its affiliates. All rights reserved.
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License, version 2.0,
|
|
as published by the Free Software Foundation.
|
|
|
|
This program is also distributed with certain software (including
|
|
but not limited to OpenSSL) that is licensed under separate terms,
|
|
as designated in a particular file or component or in included license
|
|
documentation. The authors of MySQL hereby grant you an additional
|
|
permission to link the program and your derivative works with the
|
|
separately licensed software that they have included with MySQL.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License, version 2.0, for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
|
|
|
|
#include "sql/filesort_utils.h"
|
|
|
|
#include <string.h>
|
|
#include <algorithm>
|
|
#include <cmath>
|
|
|
|
#include "add_with_saturate.h"
|
|
#include "my_dbug.h"
|
|
#include "my_io.h"
|
|
#include "my_pointer_arithmetic.h"
|
|
#include "sql/cmp_varlen_keys.h"
|
|
#include "sql/opt_costmodel.h"
|
|
#include "sql/sort_param.h"
|
|
#include "sql/sql_sort.h"
|
|
#include "sql/thr_malloc.h"
|
|
|
|
PSI_memory_key key_memory_Filesort_buffer_sort_keys;
|
|
|
|
using std::max;
|
|
using std::min;
|
|
using std::sort;
|
|
using std::stable_sort;
|
|
using std::unique;
|
|
using std::vector;
|
|
|
|
namespace {
|
|
|
|
/*
|
|
An inline function which does memcmp().
|
|
This one turns out to be pretty fast on all platforms, except sparc.
|
|
See the accompanying unit tests, which measure various implementations.
|
|
*/
|
|
inline bool my_mem_compare(const uchar *s1, const uchar *s2, size_t len) {
|
|
DBUG_ASSERT(len > 0);
|
|
DBUG_ASSERT(s1 != NULL);
|
|
DBUG_ASSERT(s2 != NULL);
|
|
do {
|
|
if (*s1++ != *s2++) return *--s1 < *--s2;
|
|
} while (--len != 0);
|
|
return false;
|
|
}
|
|
|
|
#define COMPARE(N) \
|
|
if (s1[N] != s2[N]) return s1[N] < s2[N]
|
|
|
|
inline bool my_mem_compare_longkey(const uchar *s1, const uchar *s2,
|
|
size_t len) {
|
|
COMPARE(0);
|
|
COMPARE(1);
|
|
COMPARE(2);
|
|
COMPARE(3);
|
|
return memcmp(s1 + 4, s2 + 4, len - 4) < 0;
|
|
}
|
|
|
|
class Mem_compare {
|
|
public:
|
|
explicit Mem_compare(size_t n) : m_size(n) {}
|
|
bool operator()(const uchar *s1, const uchar *s2) const {
|
|
#ifdef __sun
|
|
// The native memcmp is faster on SUN.
|
|
return memcmp(s1, s2, m_size) < 0;
|
|
#else
|
|
return my_mem_compare(s1, s2, m_size);
|
|
#endif
|
|
}
|
|
|
|
private:
|
|
size_t m_size;
|
|
};
|
|
|
|
class Mem_compare_longkey {
|
|
public:
|
|
explicit Mem_compare_longkey(size_t n) : m_size(n) {}
|
|
bool operator()(const uchar *s1, const uchar *s2) const {
|
|
#ifdef __sun
|
|
// The native memcmp is faster on SUN.
|
|
return memcmp(s1, s2, m_size) < 0;
|
|
#else
|
|
return my_mem_compare_longkey(s1, s2, m_size);
|
|
#endif
|
|
}
|
|
|
|
private:
|
|
size_t m_size;
|
|
};
|
|
|
|
class Mem_compare_varlen_key {
|
|
public:
|
|
Mem_compare_varlen_key(const Bounds_checked_array<st_sort_field> sfa,
|
|
bool use_hash_arg)
|
|
: sort_field_array(sfa.array(), sfa.size()), use_hash(use_hash_arg) {}
|
|
|
|
bool operator()(const uchar *s1, const uchar *s2) const {
|
|
return cmp_varlen_keys(sort_field_array, use_hash, s1, s2);
|
|
}
|
|
|
|
private:
|
|
Bounds_checked_array<st_sort_field> sort_field_array;
|
|
bool use_hash;
|
|
};
|
|
|
|
template <class Comp>
|
|
class Equality_from_less {
|
|
public:
|
|
explicit Equality_from_less(const Comp &comp) : m_comp(comp) {}
|
|
|
|
template <class A, class B>
|
|
bool operator()(const A &a, const B &b) const {
|
|
return !(m_comp(a, b) || m_comp(b, a));
|
|
}
|
|
|
|
private:
|
|
const Comp &m_comp;
|
|
};
|
|
|
|
} // namespace
|
|
|
|
unsigned Filesort_buffer::sort_buffer(Sort_param *param, uint count) {
|
|
const bool force_stable_sort = param->m_force_stable_sort;
|
|
param->m_sort_algorithm = Sort_param::FILESORT_ALG_NONE;
|
|
|
|
if (count <= 1) return count;
|
|
if (param->max_compare_length() == 0) return count;
|
|
|
|
const auto it_begin = begin(m_record_pointers);
|
|
const auto it_end = begin(m_record_pointers) + count;
|
|
|
|
if (param->using_varlen_keys()) {
|
|
const Mem_compare_varlen_key comp(param->local_sortorder, param->use_hash);
|
|
if (force_stable_sort) {
|
|
param->m_sort_algorithm = Sort_param::FILESORT_ALG_STD_STABLE;
|
|
stable_sort(it_begin, it_end, comp);
|
|
} else {
|
|
// TODO: Make more elaborate heuristics than just always picking
|
|
// std::sort.
|
|
param->m_sort_algorithm = Sort_param::FILESORT_ALG_STD_SORT;
|
|
sort(it_begin, it_end, comp);
|
|
}
|
|
if (param->m_remove_duplicates) {
|
|
return unique(it_begin, it_end,
|
|
Equality_from_less<Mem_compare_varlen_key>(comp)) -
|
|
it_begin;
|
|
}
|
|
return count;
|
|
}
|
|
|
|
// If we don't use addon fields, we'll have the record position appended to
|
|
// the end of each record. This disturbs our equality comparisons, so we'll
|
|
// have to remove it. (Removing it also makes the comparisons ever so slightly
|
|
// cheaper.)
|
|
size_t key_len = param->max_compare_length();
|
|
if (!param->using_addon_fields()) {
|
|
key_len -= param->ref_length;
|
|
}
|
|
|
|
/*
|
|
std::stable_sort has some extra overhead in allocating the temp buffer,
|
|
which takes some time. The cutover point where it starts to get faster
|
|
than quicksort seems to be somewhere around 10 to 40 records.
|
|
So we're a bit conservative, and stay with quicksort up to 100 records.
|
|
*/
|
|
if (count <= 100 && !force_stable_sort) {
|
|
if (param->max_compare_length() < 10) {
|
|
param->m_sort_algorithm = Sort_param::FILESORT_ALG_STD_SORT;
|
|
sort(it_begin, it_end, Mem_compare(key_len));
|
|
if (param->m_remove_duplicates) {
|
|
return unique(it_begin, it_end,
|
|
Equality_from_less<Mem_compare>(Mem_compare(key_len))) -
|
|
it_begin;
|
|
}
|
|
return count;
|
|
}
|
|
param->m_sort_algorithm = Sort_param::FILESORT_ALG_STD_SORT;
|
|
sort(it_begin, it_end, Mem_compare_longkey(param->max_compare_length()));
|
|
if (param->m_remove_duplicates) {
|
|
auto new_end = unique(it_begin, it_end,
|
|
Equality_from_less<Mem_compare_longkey>(
|
|
Mem_compare_longkey(key_len)));
|
|
return new_end - it_begin;
|
|
}
|
|
return count;
|
|
}
|
|
|
|
param->m_sort_algorithm = Sort_param::FILESORT_ALG_STD_STABLE;
|
|
// Heuristics here: avoid function overhead call for short keys.
|
|
if (key_len < 10) {
|
|
stable_sort(it_begin, it_end, Mem_compare(key_len));
|
|
if (param->m_remove_duplicates) {
|
|
return unique(it_begin, it_end,
|
|
Equality_from_less<Mem_compare>(Mem_compare(key_len))) -
|
|
it_begin;
|
|
}
|
|
} else {
|
|
stable_sort(it_begin, it_end, Mem_compare_longkey(key_len));
|
|
if (param->m_remove_duplicates) {
|
|
return unique(it_begin, it_end,
|
|
Equality_from_less<Mem_compare_longkey>(
|
|
Mem_compare_longkey(key_len))) -
|
|
it_begin;
|
|
}
|
|
}
|
|
return count;
|
|
}
|
|
|
|
void Filesort_buffer::reset() {
|
|
update_peak_memory_used();
|
|
m_record_pointers.clear();
|
|
if (m_blocks.size() >= 2) {
|
|
// Free every block but the last.
|
|
m_blocks.erase(m_blocks.begin(), m_blocks.end() - 1);
|
|
}
|
|
|
|
/*
|
|
m_max_record_length can have changed since last time; if the remaining
|
|
(largest) block is not large enough for a single row of the next size,
|
|
then clear out that, too.
|
|
*/
|
|
if (m_max_record_length > m_current_block_size) {
|
|
free_sort_buffer();
|
|
}
|
|
|
|
if (m_blocks.empty()) {
|
|
DBUG_ASSERT(m_next_rec_ptr == nullptr);
|
|
DBUG_ASSERT(m_current_block_end == nullptr);
|
|
DBUG_ASSERT(m_current_block_size == 0);
|
|
} else {
|
|
m_next_rec_ptr = m_blocks[0].get();
|
|
DBUG_ASSERT(m_current_block_end == m_next_rec_ptr + m_current_block_size);
|
|
}
|
|
m_space_used_other_blocks = 0;
|
|
}
|
|
|
|
bool Filesort_buffer::preallocate_records(size_t num_records) {
|
|
if (m_max_record_length == 0xFFFFFFFFu) {
|
|
// The rest of the code uses this value for “infinite” and saturates to it,
|
|
// so even if we have a large sort buffer (> 4 GB), we we can't know for
|
|
// sure there's going to be room.
|
|
return true;
|
|
}
|
|
|
|
reset();
|
|
|
|
const size_t bytes_needed = num_records * m_max_record_length;
|
|
if (bytes_needed + num_records * sizeof(m_record_pointers[0]) >
|
|
m_max_size_in_bytes) {
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
If the remaining block can't hold what we need, then it's of no
|
|
use to us (it doesn't save us any allocations), so get rid of it
|
|
and allocate one that's exactly the right size.
|
|
*/
|
|
if (m_next_rec_ptr + bytes_needed > m_current_block_end) {
|
|
free_sort_buffer();
|
|
if (allocate_sized_block(bytes_needed)) {
|
|
return true;
|
|
}
|
|
m_record_pointers.reserve(num_records);
|
|
}
|
|
while (m_record_pointers.size() < num_records) {
|
|
Bounds_checked_array<uchar> ptr =
|
|
get_next_record_pointer(m_max_record_length);
|
|
(void)ptr;
|
|
DBUG_ASSERT(ptr.array() != nullptr);
|
|
commit_used_memory(m_max_record_length);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool Filesort_buffer::allocate_block(size_t num_bytes) {
|
|
DBUG_EXECUTE_IF("alloc_sort_buffer_fail",
|
|
DBUG_SET("+d,simulate_out_of_memory"););
|
|
|
|
size_t next_block_size;
|
|
if (m_current_block_size == 0) {
|
|
// First block.
|
|
next_block_size = MIN_SORT_MEMORY;
|
|
} else {
|
|
next_block_size = m_current_block_size + m_current_block_size / 2;
|
|
}
|
|
|
|
/*
|
|
If our last block isn't used at all, we can safely free it
|
|
before we try to allocate a larger one. Note that we do this
|
|
after the calculation above, which uses m_current_block_size.
|
|
*/
|
|
if (!m_blocks.empty() && m_blocks.back().get() == m_next_rec_ptr) {
|
|
m_current_block_size = 0;
|
|
m_next_rec_ptr = nullptr;
|
|
m_current_block_end = nullptr;
|
|
m_blocks.pop_back();
|
|
}
|
|
|
|
// Figure out how much space we've used, to see how much is left (if
|
|
// anything).
|
|
size_t space_used = m_current_block_size + m_space_used_other_blocks;
|
|
space_used += m_record_pointers.capacity() * sizeof(m_record_pointers[0]);
|
|
|
|
size_t space_left;
|
|
if (space_used > m_max_size_in_bytes)
|
|
space_left = 0;
|
|
else
|
|
space_left = m_max_size_in_bytes - space_used;
|
|
|
|
/*
|
|
Adjust space_left to take into account that filling this new buffer
|
|
with records would necessarily also add pointers to m_record_pointers.
|
|
Note that we know how much space record_pointers currently is using,
|
|
but not how much it could potentially be using in the future as we add
|
|
records; we take a best-case estimate based on maximum-size records.
|
|
It's also impossible to say how capacity() will change since this
|
|
is an implementation detail, so we don't take that into account.
|
|
This means that, for smaller records, we could go above the maximum
|
|
permitted total memory usage.
|
|
*/
|
|
size_t min_num_rows_capacity =
|
|
m_record_pointers.size() +
|
|
space_left /
|
|
AddWithSaturate(m_max_record_length, sizeof(m_record_pointers[0]));
|
|
if (min_num_rows_capacity > m_record_pointers.capacity()) {
|
|
space_left -= (min_num_rows_capacity - m_record_pointers.capacity()) *
|
|
sizeof(m_record_pointers[0]);
|
|
}
|
|
|
|
next_block_size = min(max(next_block_size, num_bytes), space_left);
|
|
if (next_block_size < num_bytes) {
|
|
/*
|
|
If we're really out of space, but have at least 32 kB unused in
|
|
m_record_pointers, try to reclaim some space and try again. This should
|
|
only be needed in some very rare cases where we first sort a lot of very
|
|
short rows (yielding a huge amount of record pointers) and then need to
|
|
sort huge rows that wouldn't fit in the buffer otherwise -- in other
|
|
words, nearly never.
|
|
*/
|
|
size_t excess_bytes =
|
|
(m_record_pointers.capacity() - m_record_pointers.size()) *
|
|
sizeof(m_record_pointers[0]);
|
|
if (excess_bytes >= 32768) {
|
|
size_t old_capacity = m_record_pointers.capacity();
|
|
m_record_pointers.shrink_to_fit();
|
|
if (m_record_pointers.capacity() < old_capacity) {
|
|
return allocate_block(num_bytes);
|
|
}
|
|
}
|
|
|
|
// We're full.
|
|
return true;
|
|
}
|
|
|
|
return allocate_sized_block(next_block_size);
|
|
}
|
|
|
|
bool Filesort_buffer::allocate_sized_block(size_t block_size) {
|
|
unique_ptr_my_free<uchar[]> new_block((uchar *)my_malloc(
|
|
key_memory_Filesort_buffer_sort_keys, block_size, MYF(0)));
|
|
if (new_block == nullptr) {
|
|
return true;
|
|
}
|
|
|
|
m_space_used_other_blocks += m_current_block_size;
|
|
m_current_block_size = block_size;
|
|
m_next_rec_ptr = new_block.get();
|
|
m_current_block_end = new_block.get() + m_current_block_size;
|
|
m_blocks.push_back(std::move(new_block));
|
|
|
|
return false;
|
|
}
|
|
|
|
void Filesort_buffer::free_sort_buffer() {
|
|
update_peak_memory_used();
|
|
|
|
// std::vector::clear() does not necessarily free all the memory,
|
|
// but moving or swapping in an empty vector typically does (and we
|
|
// rely on this, even though we really shouldn't). This shouldn't have
|
|
// been a problem since they will be cleared in the destructor, but
|
|
// there are _many_ places scattered around the code that construct TABLE
|
|
// objects (which indirectly contain Filesort_buffer objects) and never
|
|
// destroy them properly. (You can find lots of them easily by adding an
|
|
// std::unique_ptr<int> to Filesort_buffer and giving it a value in the
|
|
// constructor; it will leak all over the place.) We should fix that,
|
|
// but for the time being, we have this workaround instead.
|
|
m_record_pointers = vector<uchar *>();
|
|
m_blocks = vector<unique_ptr_my_free<uchar[]>>();
|
|
|
|
m_space_used_other_blocks = 0;
|
|
m_next_rec_ptr = nullptr;
|
|
m_current_block_end = nullptr;
|
|
m_current_block_size = 0;
|
|
}
|
|
|
|
Bounds_checked_array<uchar> Filesort_buffer::get_contiguous_buffer() {
|
|
if (m_current_block_size != m_max_size_in_bytes) {
|
|
free_sort_buffer();
|
|
|
|
if (allocate_sized_block(m_max_size_in_bytes)) {
|
|
return Bounds_checked_array<uchar>(nullptr, 0);
|
|
}
|
|
}
|
|
return Bounds_checked_array<uchar>(m_blocks.back().get(),
|
|
m_max_size_in_bytes);
|
|
}
|
|
|
|
void Filesort_buffer::update_peak_memory_used() const {
|
|
m_peak_memory_used =
|
|
max(m_peak_memory_used,
|
|
m_record_pointers.capacity() * sizeof(m_record_pointers[0]) +
|
|
m_current_block_size + m_space_used_other_blocks);
|
|
}
|