1277 lines
46 KiB
C++
1277 lines
46 KiB
C++
#ifndef SQL_COMPOSITE_ITERATORS_INCLUDED
|
||
#define SQL_COMPOSITE_ITERATORS_INCLUDED
|
||
|
||
/* Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
|
||
|
||
This program is free software; you can redistribute it and/or modify
|
||
it under the terms of the GNU General Public License, version 2.0,
|
||
as published by the Free Software Foundation.
|
||
|
||
This program is also distributed with certain software (including
|
||
but not limited to OpenSSL) that is licensed under separate terms,
|
||
as designated in a particular file or component or in included license
|
||
documentation. The authors of MySQL hereby grant you an additional
|
||
permission to link the program and your derivative works with the
|
||
separately licensed software that they have included with MySQL.
|
||
|
||
This program is distributed in the hope that it will be useful,
|
||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
GNU General Public License, version 2.0, for more details.
|
||
|
||
You should have received a copy of the GNU General Public License
|
||
along with this program; if not, write to the Free Software
|
||
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
|
||
|
||
/**
|
||
@file composite_iterators.h
|
||
|
||
A composite row iterator is one that takes in one or more existing iterators
|
||
and processes their rows in some interesting way. They are usually not bound
|
||
to a single table or similar, but are the inner (non-leaf) nodes of the
|
||
iterator execution tree. They consistently own their source iterator, although
|
||
not its memory (since we never allocate row iterators on the heap--usually on
|
||
a MEM_ROOT>). This means that in the end, you'll end up with a single root
|
||
iterator which then owns everything else recursively.
|
||
|
||
SortingIterator is also a composite iterator, but is defined in its own file.
|
||
*/
|
||
|
||
#include <stdio.h>
|
||
#include <algorithm>
|
||
#include <memory>
|
||
#include <string>
|
||
#include <vector>
|
||
|
||
#include "my_alloc.h"
|
||
#include "my_base.h"
|
||
#include "my_dbug.h"
|
||
#include "my_table_map.h"
|
||
#include "prealloced_array.h"
|
||
#include "sql/item.h"
|
||
#include "sql/row_iterator.h"
|
||
#include "sql/table.h"
|
||
|
||
class FollowTailIterator;
|
||
template <class T>
|
||
class List;
|
||
class JOIN;
|
||
class SELECT_LEX;
|
||
class SJ_TMP_TABLE;
|
||
class THD;
|
||
class Temp_table_param;
|
||
class Window;
|
||
|
||
/**
|
||
An iterator that takes in a stream of rows and passes through only those that
|
||
meet some criteria (i.e., a condition evaluates to true). This is typically
|
||
used for WHERE/HAVING.
|
||
*/
|
||
class FilterIterator final : public RowIterator {
|
||
public:
|
||
FilterIterator(THD *thd, unique_ptr_destroy_only<RowIterator> source,
|
||
Item *condition)
|
||
: RowIterator(thd), m_source(move(source)), m_condition(condition) {}
|
||
|
||
bool Init() override { return m_source->Init(); }
|
||
|
||
int Read() override;
|
||
|
||
void SetNullRowFlag(bool is_null_row) override {
|
||
m_source->SetNullRowFlag(is_null_row);
|
||
}
|
||
|
||
void StartPSIBatchMode() override { m_source->StartPSIBatchMode(); }
|
||
void EndPSIBatchModeIfStarted() override {
|
||
m_source->EndPSIBatchModeIfStarted();
|
||
}
|
||
void UnlockRow() override { m_source->UnlockRow(); }
|
||
|
||
std::vector<Child> children() const override;
|
||
|
||
std::vector<std::string> DebugString() const override {
|
||
return {"Filter: " + ItemToString(m_condition)};
|
||
}
|
||
|
||
private:
|
||
unique_ptr_destroy_only<RowIterator> m_source;
|
||
Item *m_condition;
|
||
};
|
||
|
||
/**
|
||
Handles LIMIT and/or OFFSET; Init() eats the first "offset" rows, and Read()
|
||
stops as soon as it's seen "limit" rows (including any skipped by offset).
|
||
*/
|
||
class LimitOffsetIterator final : public RowIterator {
|
||
public:
|
||
/**
|
||
@param thd Thread context
|
||
@param source Row source
|
||
@param limit Maximum number of rows to read, including the ones skipped by
|
||
offset. Can be HA_POS_ERROR for no limit.
|
||
@param offset Number of initial rows to skip. Can be 0 for no offset.
|
||
@param count_all_rows If true, the query will run to completion to get
|
||
more accurate numbers for skipped_rows, so you will not get any
|
||
performance benefits of early end.
|
||
@param skipped_rows If not nullptr, is incremented for each row skipped by
|
||
offset or limit.
|
||
*/
|
||
LimitOffsetIterator(THD *thd, unique_ptr_destroy_only<RowIterator> source,
|
||
ha_rows limit, ha_rows offset, bool count_all_rows,
|
||
ha_rows *skipped_rows)
|
||
: RowIterator(thd),
|
||
m_source(move(source)),
|
||
m_limit(limit),
|
||
m_offset(offset),
|
||
m_count_all_rows(count_all_rows),
|
||
m_skipped_rows(skipped_rows) {
|
||
if (count_all_rows) {
|
||
DBUG_ASSERT(m_skipped_rows != nullptr);
|
||
}
|
||
}
|
||
|
||
bool Init() override;
|
||
|
||
int Read() override;
|
||
|
||
void SetNullRowFlag(bool is_null_row) override {
|
||
m_source->SetNullRowFlag(is_null_row);
|
||
}
|
||
|
||
void StartPSIBatchMode() override { m_source->StartPSIBatchMode(); }
|
||
void EndPSIBatchModeIfStarted() override {
|
||
m_source->EndPSIBatchModeIfStarted();
|
||
}
|
||
void UnlockRow() override { m_source->UnlockRow(); }
|
||
|
||
std::vector<Child> children() const override {
|
||
return std::vector<Child>{{m_source.get(), ""}};
|
||
}
|
||
|
||
std::vector<std::string> DebugString() const override {
|
||
char buf[256];
|
||
if (m_offset == 0) {
|
||
snprintf(buf, sizeof(buf), "Limit: %llu row(s)", m_limit);
|
||
} else if (m_limit == HA_POS_ERROR) {
|
||
snprintf(buf, sizeof(buf), "Offset: %llu row(s)", m_offset);
|
||
} else {
|
||
snprintf(buf, sizeof(buf), "Limit/Offset: %llu/%llu row(s)",
|
||
m_limit - m_offset, m_offset);
|
||
}
|
||
if (m_count_all_rows) {
|
||
return {std::string(buf) + " (no early end due to SQL_CALC_FOUND_ROWS)"};
|
||
} else {
|
||
return {std::string(buf)};
|
||
}
|
||
}
|
||
|
||
private:
|
||
unique_ptr_destroy_only<RowIterator> m_source;
|
||
|
||
// Note: The number of seen rows starts off at m_limit if we have OFFSET,
|
||
// which means we don't need separate LIMIT and OFFSET tests on the
|
||
// fast path of Read().
|
||
ha_rows m_seen_rows;
|
||
|
||
/**
|
||
Whether we have OFFSET rows that we still need to skip.
|
||
*/
|
||
bool m_needs_offset;
|
||
|
||
const ha_rows m_limit, m_offset;
|
||
const bool m_count_all_rows;
|
||
ha_rows *m_skipped_rows;
|
||
};
|
||
|
||
/**
|
||
Handles aggregation (typically used for GROUP BY) for the case where the rows
|
||
are already properly grouped coming in, ie., all rows that are supposed to be
|
||
part of the same group are adjacent in the input stream. (This could be
|
||
because they were sorted earlier, because we are scanning an index that
|
||
already gives us the rows in a group-compatible order, or because there is no
|
||
grouping.)
|
||
|
||
AggregateIterator is special in that it's one of the very few row iterators
|
||
that actually change the shape of the rows; some columns are dropped as part
|
||
of aggregation, others (the aggregates) are added. For this reason (and also
|
||
because we need to make copies of the group expressions -- see Read()), it
|
||
conceptually always outputs to a temporary table. If we _are_ outputting to a
|
||
temporary table, that's not a problem -- we take over responsibility for
|
||
copying the group expressions from MaterializeIterator, which would otherwise
|
||
do it.
|
||
|
||
However, if we are outputting directly to the user, we need somewhere to store
|
||
the output. This is solved by abusing the slice system; since we only need to
|
||
buffer a single row, we can set up just enough items in the
|
||
REF_SLICE_ORDERED_GROUP_BY slice, so that it can hold a single row. This row
|
||
is then used for our output, and we then switch to it just before the end of
|
||
Read() so that anyone reading from the buffers will get that output.
|
||
The caller knows the context about where our output goes, and thus also picks
|
||
the appropriate output slice for us.
|
||
|
||
This isn't very pretty. What should be done is probably a more abstract
|
||
concept of sending a row around and taking copies of it if needed, as opposed
|
||
to it implicitly staying in the table's buffer. (This would also solve some
|
||
issues in EQRefIterator and when synthesizing NULL rows for outer joins.)
|
||
However, that's a large refactoring.
|
||
*/
|
||
class AggregateIterator final : public RowIterator {
|
||
public:
|
||
AggregateIterator(THD *thd, unique_ptr_destroy_only<RowIterator> source,
|
||
JOIN *join, Temp_table_param *temp_table_param,
|
||
int output_slice, bool rollup);
|
||
|
||
bool Init() override;
|
||
int Read() override;
|
||
void SetNullRowFlag(bool is_null_row) override {
|
||
m_source->SetNullRowFlag(is_null_row);
|
||
}
|
||
|
||
void StartPSIBatchMode() override { m_source->StartPSIBatchMode(); }
|
||
void EndPSIBatchModeIfStarted() override {
|
||
m_source->EndPSIBatchModeIfStarted();
|
||
}
|
||
void UnlockRow() override {
|
||
// Most likely, HAVING failed. Ideally, we'd like to backtrack and
|
||
// unlock all rows that went into this aggregate, but we can't do that,
|
||
// and we also can't unlock the _current_ row, since that belongs to a
|
||
// different group. Thus, do nothing.
|
||
}
|
||
|
||
std::vector<Child> children() const override {
|
||
return std::vector<Child>{{m_source.get(), ""}};
|
||
}
|
||
|
||
std::vector<std::string> DebugString() const override;
|
||
|
||
private:
|
||
enum {
|
||
READING_FIRST_ROW,
|
||
LAST_ROW_STARTED_NEW_GROUP,
|
||
READING_ROWS,
|
||
OUTPUTTING_ROLLUP_ROWS,
|
||
DONE_OUTPUTTING_ROWS
|
||
} m_state;
|
||
|
||
unique_ptr_destroy_only<RowIterator> m_source;
|
||
|
||
/**
|
||
The join we are part of. It would be nicer not to rely on this,
|
||
but we need a large number of members from there, like which
|
||
aggregate functions we have, the THD, temporary table parameters
|
||
and so on.
|
||
*/
|
||
JOIN *m_join = nullptr;
|
||
|
||
/// The slice of the fields we are reading from (see the class comment).
|
||
int m_input_slice;
|
||
|
||
/// The slice of the fields we are outputting to. See the class comment.
|
||
int m_output_slice;
|
||
|
||
/// Whether we have seen the last input row.
|
||
bool m_seen_eof;
|
||
|
||
/**
|
||
Used to save NULL information in the specific case where we have
|
||
zero input rows.
|
||
*/
|
||
table_map m_save_nullinfo;
|
||
|
||
/// The parameters for the temporary table we are materializing into, if any.
|
||
Temp_table_param *m_temp_table_param;
|
||
|
||
/// Whether this is a rollup query.
|
||
const bool m_rollup;
|
||
|
||
/**
|
||
Whether we have a rollup query where we needed to replace m_join->fields
|
||
with a set of Item_refs. See the constructor for more information.
|
||
*/
|
||
bool m_replace_field_list;
|
||
|
||
/// If we have replaced the field list, contains the original field list.
|
||
List<Item> *m_original_fields = nullptr;
|
||
|
||
/**
|
||
If we have replaced the field list, this is the list of Item pointers
|
||
that each Item_ref points into.
|
||
*/
|
||
Mem_root_array<Item *> *m_current_fields = nullptr;
|
||
|
||
/**
|
||
The list that the current values in m_current_fields come from.
|
||
This is used purely as an optimization so that SwitchFieldList()
|
||
does not have to do copying work if m_current_fields is already set up
|
||
correctly. Only used if m_replace_field_list is true.
|
||
*/
|
||
const List<Item> *m_current_fields_source = nullptr;
|
||
|
||
/**
|
||
For rollup: The index of the first group item that did _not_ change when we
|
||
last switched groups. E.g., if we have group fields A,B,C,D and then switch
|
||
to group A,B,D,D, this value will become 1 (which means that we need
|
||
to output rollup rows for 2 -- A,B,D,NULL -- and then 1 -- A,B,NULL,NULL).
|
||
m_current_rollup_position will count down from the end until it becomes
|
||
less than this value.
|
||
|
||
In addition, it is important to know this value so that we known
|
||
which aggregates to reset once we start reading rows again; e.g.,
|
||
in the given example, the two first aggregates should keep counting,
|
||
while the two last ones should be reset. join->sum_funcs_end contains
|
||
the right end pointers for this purpose.
|
||
|
||
If we do not have rollup, this value is perennially zero, because there
|
||
is only one element in join->sum_funcs_end (representing all aggregates
|
||
in the query).
|
||
*/
|
||
int m_last_unchanged_group_item_idx;
|
||
|
||
/**
|
||
If we are in state OUTPUTTING_ROLLUP_ROWS, where we are in the iteration.
|
||
This value will start at the index of the last group expression and then
|
||
count backwards down to and including m_last_unchanged_group_item_idx.
|
||
It is used to know which field list we should send.
|
||
*/
|
||
int m_current_rollup_position;
|
||
|
||
void SwitchFieldList(List<Item> *fields) {
|
||
if (!m_replace_field_list || m_current_fields_source == fields) {
|
||
return;
|
||
}
|
||
|
||
size_t item_index = 0;
|
||
for (Item &item : *fields) {
|
||
(*m_current_fields)[item_index++] = &item;
|
||
}
|
||
m_current_fields_source = fields;
|
||
}
|
||
};
|
||
|
||
/**
|
||
Similar to AggregateIterator, but asusmes that the actual aggregates are
|
||
already have been filled out (typically by QUICK_RANGE_MIN_MAX), and all the
|
||
iterator needs to do is copy over the non-aggregated fields.
|
||
*/
|
||
class PrecomputedAggregateIterator final : public RowIterator {
|
||
public:
|
||
PrecomputedAggregateIterator(THD *thd,
|
||
unique_ptr_destroy_only<RowIterator> source,
|
||
JOIN *join, Temp_table_param *temp_table_param,
|
||
int output_slice)
|
||
: RowIterator(thd),
|
||
m_source(move(source)),
|
||
m_join(join),
|
||
m_temp_table_param(temp_table_param),
|
||
m_output_slice(output_slice) {}
|
||
|
||
bool Init() override;
|
||
int Read() override;
|
||
void SetNullRowFlag(bool is_null_row) override {
|
||
m_source->SetNullRowFlag(is_null_row);
|
||
}
|
||
|
||
void StartPSIBatchMode() override { m_source->StartPSIBatchMode(); }
|
||
void EndPSIBatchModeIfStarted() override {
|
||
m_source->EndPSIBatchModeIfStarted();
|
||
}
|
||
void UnlockRow() override {
|
||
// See AggregateIterator::UnlockRow().
|
||
}
|
||
|
||
std::vector<Child> children() const override {
|
||
return std::vector<Child>{{m_source.get(), ""}};
|
||
}
|
||
|
||
std::vector<std::string> DebugString() const override;
|
||
|
||
private:
|
||
unique_ptr_destroy_only<RowIterator> m_source;
|
||
|
||
/**
|
||
The join we are part of. It would be nicer not to rely on this,
|
||
but we need a large number of members from there, like which
|
||
aggregate functions we have, the THD, temporary table parameters
|
||
and so on.
|
||
*/
|
||
JOIN *m_join = nullptr;
|
||
|
||
/// The parameters for the temporary table we are materializing into, if any.
|
||
Temp_table_param *m_temp_table_param;
|
||
|
||
/// The slice of the fields we are outputting to.
|
||
int m_output_slice;
|
||
};
|
||
|
||
enum class JoinType { INNER, OUTER, ANTI, SEMI };
|
||
|
||
/**
|
||
A simple nested loop join, taking in two iterators (left/outer and
|
||
right/inner) and joining them together. This may, of course, scan the inner
|
||
iterator many times. It is currently the only form of join we have.
|
||
|
||
The iterator works as a state machine, where the state records whether we need
|
||
to read a new outer row or not, and whether we've seen any rows from the inner
|
||
iterator at all (if not, an outer join need to synthesize a new NULL row).
|
||
|
||
The iterator takes care of activating performance schema batch mode on the
|
||
right iterator if needed; this is typically only used if it is the innermost
|
||
table in the entire join (where the gains from turning on batch mode is the
|
||
largest, and the accuracy loss from turning it off are the least critical).
|
||
*/
|
||
class NestedLoopIterator final : public RowIterator {
|
||
public:
|
||
NestedLoopIterator(THD *thd,
|
||
unique_ptr_destroy_only<RowIterator> source_outer,
|
||
unique_ptr_destroy_only<RowIterator> source_inner,
|
||
JoinType join_type, bool pfs_batch_mode)
|
||
: RowIterator(thd),
|
||
m_source_outer(move(source_outer)),
|
||
m_source_inner(move(source_inner)),
|
||
m_join_type(join_type),
|
||
m_pfs_batch_mode(pfs_batch_mode) {
|
||
DBUG_ASSERT(m_source_outer != nullptr);
|
||
DBUG_ASSERT(m_source_inner != nullptr);
|
||
|
||
// Batch mode makes no sense for anti- or semijoins, since they should only
|
||
// be reading one row.
|
||
if (join_type == JoinType::ANTI || join_type == JoinType::SEMI) {
|
||
DBUG_ASSERT(!pfs_batch_mode);
|
||
}
|
||
}
|
||
|
||
bool Init() override;
|
||
|
||
int Read() override;
|
||
|
||
void SetNullRowFlag(bool is_null_row) override {
|
||
// TODO: write something here about why we can't do this lazily.
|
||
m_source_outer->SetNullRowFlag(is_null_row);
|
||
m_source_inner->SetNullRowFlag(is_null_row);
|
||
}
|
||
|
||
void EndPSIBatchModeIfStarted() override {
|
||
m_source_outer->EndPSIBatchModeIfStarted();
|
||
m_source_inner->EndPSIBatchModeIfStarted();
|
||
}
|
||
|
||
void UnlockRow() override {
|
||
// Since we don't know which condition that caused the row to be rejected,
|
||
// we can't know whether we could also unlock the outer row
|
||
// (it may still be used as parts of other joined rows).
|
||
if (m_state == READING_FIRST_INNER_ROW || m_state == READING_INNER_ROWS) {
|
||
m_source_inner->UnlockRow();
|
||
}
|
||
}
|
||
|
||
std::vector<std::string> DebugString() const override;
|
||
|
||
std::vector<Child> children() const override {
|
||
return std::vector<Child>{{m_source_outer.get(), ""},
|
||
{m_source_inner.get(), ""}};
|
||
}
|
||
|
||
private:
|
||
enum {
|
||
NEEDS_OUTER_ROW,
|
||
READING_FIRST_INNER_ROW,
|
||
READING_INNER_ROWS,
|
||
END_OF_ROWS
|
||
} m_state;
|
||
|
||
unique_ptr_destroy_only<RowIterator> const m_source_outer;
|
||
unique_ptr_destroy_only<RowIterator> const m_source_inner;
|
||
const JoinType m_join_type;
|
||
|
||
/** Whether to use batch mode when scanning the inner iterator. */
|
||
const bool m_pfs_batch_mode;
|
||
};
|
||
|
||
/**
|
||
An iterator that helps invalidating caches. Every time a row passes through it
|
||
or it changes state in any other way, it increments its “generation” counter.
|
||
This allows MaterializeIterator to see whether any of its dependencies has
|
||
changed, and then force a rematerialization -- this is typically used for
|
||
LATERAL tables, where we're joining in a derived table that depends on
|
||
something earlier in the join.
|
||
*/
|
||
class CacheInvalidatorIterator final : public RowIterator {
|
||
public:
|
||
CacheInvalidatorIterator(THD *thd,
|
||
unique_ptr_destroy_only<RowIterator> source_iterator,
|
||
const std::string &name)
|
||
: RowIterator(thd),
|
||
m_source_iterator(move(source_iterator)),
|
||
m_name(name) {}
|
||
|
||
bool Init() override {
|
||
++m_generation;
|
||
return m_source_iterator->Init();
|
||
}
|
||
|
||
int Read() override {
|
||
++m_generation;
|
||
return m_source_iterator->Read();
|
||
}
|
||
|
||
void SetNullRowFlag(bool is_null_row) override {
|
||
++m_generation;
|
||
m_source_iterator->SetNullRowFlag(is_null_row);
|
||
}
|
||
|
||
void UnlockRow() override { m_source_iterator->UnlockRow(); }
|
||
std::vector<std::string> DebugString() const override;
|
||
std::vector<Child> children() const override {
|
||
return {Child{m_source_iterator.get(), ""}};
|
||
}
|
||
|
||
int64_t generation() const { return m_generation; }
|
||
std::string name() const { return m_name; }
|
||
|
||
private:
|
||
unique_ptr_destroy_only<RowIterator> m_source_iterator;
|
||
int64_t m_generation = 0;
|
||
std::string m_name;
|
||
};
|
||
|
||
/**
|
||
Handles materialization; the first call to Init() will scan the given iterator
|
||
to the end, store the results in a temporary table (optionally with
|
||
deduplication), and then Read() will allow you to read that table repeatedly
|
||
without the cost of executing the given subquery many times (unless you ask
|
||
for rematerialization).
|
||
|
||
When materializing, MaterializeIterator takes care of evaluating any items
|
||
that need so, and storing the results in the fields of the outgoing table --
|
||
which items is governed by the temporary table parameters.
|
||
|
||
Conceptually (although not performance-wise!), the MaterializeIterator is a
|
||
no-op if you don't ask for deduplication, and in some cases (e.g. when
|
||
scanning a table only once), we elide it. However, it's not necessarily
|
||
straightforward to do so by just not inserting the iterator, as the optimizer
|
||
will have set up everything (e.g., read sets, or what table upstream items
|
||
will read from) assuming the materialization will happen, so the realistic
|
||
option is setting up everything as if materialization would happen but not
|
||
actually write to the table; see StreamingIterator for details.
|
||
|
||
MaterializeIterator conceptually materializes iterators, not JOINs or
|
||
SELECT_LEX_UNITs. However, there are many details that leak out
|
||
(e.g., setting performance schema batch mode, slices, reusing CTEs,
|
||
etc.), so we need to send them in anyway.
|
||
*/
|
||
class MaterializeIterator final : public TableRowIterator {
|
||
public:
|
||
struct QueryBlock {
|
||
/// The iterator to read the actual rows from.
|
||
unique_ptr_destroy_only<RowIterator> subquery_iterator;
|
||
|
||
/// Used only for optimizer trace.
|
||
int select_number;
|
||
|
||
/// The JOIN that this query block represents. Used for performance
|
||
/// schema batch mode: When materializing a query block that consists of
|
||
/// a single table, MaterializeIterator needs to set up schema batch mode,
|
||
/// since there is no nested loop iterator to do it. (This is similar to
|
||
/// what ExecuteIteratorQuery() needs to do at the top level.)
|
||
JOIN *join;
|
||
|
||
/// If true, unique constraint checking via hash key is disabled
|
||
/// when materializing this query block (ie., we simply avoid calling
|
||
/// check_unique_constraint() for each row). Used when materializing
|
||
/// UNION DISTINCT and UNION ALL parts into the same table.
|
||
/// We'd like to just use a unique constraint via unique index instead,
|
||
/// but there might be other indexes on the destination table
|
||
/// that we'd like to keep, and the implementation doesn't allow
|
||
/// disabling only one index.
|
||
///
|
||
/// If you use this on a query block, doing_hash_deduplication()
|
||
/// must be true.
|
||
bool disable_deduplication_by_hash_field = false;
|
||
|
||
/// If set to false, the Field objects in the output row are
|
||
/// presumed already to be filled out. This is the case iff
|
||
/// there's an AggregateIterator earlier in the chain.
|
||
bool copy_fields_and_items;
|
||
|
||
/// If copy_fields_and_items is true, used for copying the Field objects
|
||
/// into the temporary table row. Otherwise unused.
|
||
Temp_table_param *temp_table_param;
|
||
|
||
// Whether this query block is a recursive reference back to the
|
||
// output of the materialization.
|
||
bool is_recursive_reference = false;
|
||
|
||
// If is_recursive_reference is true, contains the FollowTailIterator
|
||
// in the query block (there can be at most one recursive reference
|
||
// in a join list, as per the SQL standard, so there should be exactly one).
|
||
// Used for informing the iterators about various shared state in the
|
||
// materialization (including coordinating rematerializations).
|
||
FollowTailIterator *recursive_reader = nullptr;
|
||
};
|
||
|
||
/**
|
||
@param thd Thread handler.
|
||
@param query_blocks_to_materialize List of query blocks to materialize.
|
||
@param table Handle to table to materialize into.
|
||
@param table_iterator Iterator used for scanning the temporary table
|
||
after materialization.
|
||
@param cte If materializing a CTE, points to it (see m_cte), otherwise
|
||
nullptr.
|
||
@param unit The query expression we are materializing (see m_unit).
|
||
@param join
|
||
When materializing within the same JOIN (e.g., into a temporary table
|
||
before sorting), as opposed to a derived table or a CTE, we may need
|
||
to change the slice on the join before returning rows from the result
|
||
table. If so, join and ref_slice would need to be set, and
|
||
query_blocks_to_materialize should contain only one member, with the same
|
||
join.
|
||
@param ref_slice See join. If we are materializing across JOINs,
|
||
e.g. derived tables, ref_slice should be left at -1.
|
||
@param rematerialize true if rematerializing on every Init() call
|
||
(e.g., because we have a dependency on a value from outside the query
|
||
block).
|
||
@param limit_rows
|
||
Does the same job as a LimitOffsetIterator right before the
|
||
MaterializeIterator would have done, except that it works _after_
|
||
deduplication (if that is active). It is used for when pushing LIMIT down
|
||
to MaterializeIterator, so that we can stop materializing when there are
|
||
enough rows. The deduplication is the reason why this specific limit has
|
||
to be handled in MaterializeIterator and not using a regular
|
||
LimitOffsetIterator. Set to HA_POS_ERROR for no limit.
|
||
*/
|
||
MaterializeIterator(THD *thd,
|
||
Mem_root_array<QueryBlock> query_blocks_to_materialize,
|
||
TABLE *table,
|
||
unique_ptr_destroy_only<RowIterator> table_iterator,
|
||
const Common_table_expr *cte, SELECT_LEX_UNIT *unit,
|
||
JOIN *join, int ref_slice, bool rematerialize,
|
||
ha_rows limit_rows);
|
||
|
||
/**
|
||
A convenience form for materializing a single table only.
|
||
|
||
@param thd Thread handler.
|
||
@param subquery_iterator The iterator to read the actual rows from.
|
||
@param temp_table_param If copy_fields_and_items is true, used for copying
|
||
the Field objects into the temporary table row. Otherwise unused.
|
||
@param table Handle to table to materialize into.
|
||
@param table_iterator Iterator used for scanning the temporary table
|
||
after materialization.
|
||
@param cte If materializing a CTE, points to it (see m_cte), otherwise
|
||
nullptr.
|
||
@param select_number Used only for optimizer trace.
|
||
@param unit The query expression we are materializing (see m_unit).
|
||
@param join
|
||
When materializing within the same JOIN (e.g., into a temporary table
|
||
before sorting), as opposed to a derived table or a CTE, we may need
|
||
to change the slice on the join before returning rows from the result
|
||
table. If so, join and ref_slice would need to be set, and
|
||
query_blocks_to_materialize should contain only one member, with the same
|
||
join.
|
||
@param ref_slice See join. If we are materializing across JOINs,
|
||
e.g. derived tables, ref_slice should be left at -1.
|
||
@param copy_fields_and_items If set to false, the Field objects in the
|
||
output row are presumed already to be filled out. This is the case iff
|
||
there's an AggregateIterator earlier in the chain.
|
||
@param rematerialize true if rematerializing on every Init() call
|
||
(e.g., because we have a dependency on a value from outside the query
|
||
block).
|
||
@param limit_rows See limit_rows on the other constructor.
|
||
*/
|
||
MaterializeIterator(THD *thd,
|
||
unique_ptr_destroy_only<RowIterator> subquery_iterator,
|
||
Temp_table_param *temp_table_param, TABLE *table,
|
||
unique_ptr_destroy_only<RowIterator> table_iterator,
|
||
const Common_table_expr *cte, int select_number,
|
||
SELECT_LEX_UNIT *unit, JOIN *join, int ref_slice,
|
||
bool copy_fields_and_items, bool rematerialize,
|
||
ha_rows limit_rows);
|
||
|
||
bool Init() override;
|
||
int Read() override;
|
||
std::vector<std::string> DebugString() const override;
|
||
|
||
std::vector<Child> children() const override;
|
||
|
||
void SetNullRowFlag(bool is_null_row) override {
|
||
m_table_iterator->SetNullRowFlag(is_null_row);
|
||
}
|
||
|
||
void StartPSIBatchMode() override { m_table_iterator->StartPSIBatchMode(); }
|
||
void EndPSIBatchModeIfStarted() override;
|
||
|
||
// The temporary table is private to us, so there's no need to worry about
|
||
// locks to other transactions.
|
||
void UnlockRow() override {}
|
||
|
||
/**
|
||
Add a cache invalidator that must be checked on every Init().
|
||
If its generation has increased since last materialize, we need to
|
||
rematerialize even if m_rematerialize is false.
|
||
*/
|
||
void AddInvalidator(const CacheInvalidatorIterator *invalidator);
|
||
|
||
private:
|
||
Mem_root_array<QueryBlock> m_query_blocks_to_materialize;
|
||
unique_ptr_destroy_only<RowIterator> m_table_iterator;
|
||
|
||
/// If we are materializing a CTE, points to it (otherwise nullptr).
|
||
/// Used so that we see if some other iterator already materialized the table,
|
||
/// avoiding duplicate work.
|
||
const Common_table_expr *m_cte;
|
||
|
||
/// The query expression we are materializing. For derived tables,
|
||
/// we materialize the entire query expression; for materialization within
|
||
/// a query expression (e.g. for sorting or for windowing functions),
|
||
/// we materialize only parts of it. Used to clear correlated CTEs within
|
||
/// the unit when we rematerialize, since they depend on values from
|
||
/// outside the query expression, and those values may have changed
|
||
/// since last materialization.
|
||
SELECT_LEX_UNIT *m_unit;
|
||
|
||
/// See constructor.
|
||
JOIN *const m_join;
|
||
|
||
/// The slice to set when accessing temporary table; used if anything upstream
|
||
/// (e.g. WHERE, HAVING) wants to evaluate values based on its contents.
|
||
/// See constructor.
|
||
const int m_ref_slice;
|
||
|
||
/// If true, we need to materialize anew for each Init() (because the contents
|
||
/// of the table will depend on some outer non-constant value).
|
||
const bool m_rematerialize;
|
||
|
||
/// See constructor.
|
||
const ha_rows m_limit_rows;
|
||
|
||
struct Invalidator {
|
||
const CacheInvalidatorIterator *iterator;
|
||
int64_t generation_at_last_materialize;
|
||
};
|
||
Mem_root_array<Invalidator> m_invalidators;
|
||
|
||
/// Whether we are deduplicating using a hash field on the temporary
|
||
/// table. (This condition mirrors check_unique_constraint().)
|
||
/// If so, we compute a hash value for every row, look up all rows with
|
||
/// the same hash and manually compare them to the row we are trying to
|
||
/// insert.
|
||
///
|
||
/// Note that this is _not_ the common way of deduplicating as we go.
|
||
/// The common method is to have a regular index on the table
|
||
/// over the right columns, and in that case, ha_write_row() will fail
|
||
/// with an ignorable error, so that the row is ignored even though
|
||
/// check_unique_constraint() is not called. However, B-tree indexes
|
||
/// have limitations, in particular on length, that sometimes require us
|
||
/// to do this instead. See create_tmp_table() for details.
|
||
bool doing_hash_deduplication() const { return table()->hash_field; }
|
||
|
||
/// Whether we are deduplicating, whether through a hash field
|
||
/// or a regular unique index.
|
||
bool doing_deduplication() const;
|
||
|
||
bool MaterializeRecursive();
|
||
bool MaterializeQueryBlock(const QueryBlock &query_block,
|
||
ha_rows *stored_rows);
|
||
};
|
||
|
||
/**
|
||
StreamingIterator is a minimal version of MaterializeIterator that does not
|
||
actually materialize; instead, every Read() just forwards the call to the
|
||
subquery iterator and does the required copying from one set of fields to
|
||
another.
|
||
|
||
It is used for when the optimizer would normally set up a materialization,
|
||
but you don't actually need one, ie. you don't want to read the rows multiple
|
||
times after writing them, and you don't want to access them by index (only
|
||
a single table scan). If you don't need the copy functionality (ie., you
|
||
have an AggregateIterator, which does this job already), you still need a
|
||
StreamingIterator, to set the NULL row flag on the temporary table.
|
||
*/
|
||
class StreamingIterator final : public TableRowIterator {
|
||
public:
|
||
StreamingIterator(THD *thd,
|
||
unique_ptr_destroy_only<RowIterator> subquery_iterator,
|
||
Temp_table_param *temp_table_param, TABLE *table,
|
||
bool copy_fields_and_items);
|
||
|
||
bool Init() override;
|
||
|
||
int Read() override;
|
||
|
||
std::vector<std::string> DebugString() const override {
|
||
return {"Stream results"};
|
||
}
|
||
|
||
std::vector<Child> children() const override {
|
||
return std::vector<Child>{{m_subquery_iterator.get(), ""}};
|
||
}
|
||
|
||
void StartPSIBatchMode() override {
|
||
m_subquery_iterator->StartPSIBatchMode();
|
||
}
|
||
void EndPSIBatchModeIfStarted() override {
|
||
m_subquery_iterator->EndPSIBatchModeIfStarted();
|
||
}
|
||
void UnlockRow() override { m_subquery_iterator->UnlockRow(); }
|
||
|
||
private:
|
||
unique_ptr_destroy_only<RowIterator> m_subquery_iterator;
|
||
Temp_table_param *m_temp_table_param;
|
||
const bool m_copy_fields_and_items;
|
||
ha_rows m_row_number;
|
||
|
||
// Whether the iterator should generate and provide a row ID. Only true if the
|
||
// iterator is part of weedout, where the iterator will create a fake row ID
|
||
// to uniquely identify the rows it produces.
|
||
bool m_provide_rowid{false};
|
||
};
|
||
|
||
/**
|
||
Aggregates unsorted data into a temporary table, using update operations
|
||
to keep running aggregates. After that, works as a MaterializeIterator
|
||
in that it allows the temporary table to be scanned.
|
||
*/
|
||
class TemptableAggregateIterator final : public TableRowIterator {
|
||
public:
|
||
TemptableAggregateIterator(
|
||
THD *thd, unique_ptr_destroy_only<RowIterator> subquery_iterator,
|
||
Temp_table_param *temp_table_param, TABLE *table,
|
||
unique_ptr_destroy_only<RowIterator> table_iterator,
|
||
SELECT_LEX *select_lex, JOIN *join, int ref_slice);
|
||
|
||
bool Init() override;
|
||
int Read() override;
|
||
void SetNullRowFlag(bool is_null_row) override {
|
||
m_table_iterator->SetNullRowFlag(is_null_row);
|
||
}
|
||
void EndPSIBatchModeIfStarted() override {
|
||
m_table_iterator->EndPSIBatchModeIfStarted();
|
||
m_subquery_iterator->EndPSIBatchModeIfStarted();
|
||
}
|
||
void UnlockRow() override {}
|
||
std::vector<std::string> DebugString() const override;
|
||
|
||
std::vector<Child> children() const override;
|
||
|
||
private:
|
||
/// The iterator we are reading rows from.
|
||
unique_ptr_destroy_only<RowIterator> m_subquery_iterator;
|
||
|
||
/// The iterator used to scan the resulting temporary table.
|
||
unique_ptr_destroy_only<RowIterator> m_table_iterator;
|
||
|
||
Temp_table_param *m_temp_table_param;
|
||
SELECT_LEX *m_select_lex;
|
||
JOIN *const m_join;
|
||
const int m_ref_slice;
|
||
|
||
// See MaterializeIterator::doing_hash_deduplication().
|
||
bool using_hash_key() const { return table()->hash_field; }
|
||
};
|
||
|
||
/**
|
||
An iterator that wraps a Table_function (e.g. JSON_TABLE) and allows you to
|
||
iterate over the materialized temporary table. The table is materialized anew
|
||
for every Init().
|
||
|
||
TODO: Just wrapping it is probably not the optimal thing to do;
|
||
Table_function is highly oriented around materialization, but never caches.
|
||
Thus, perhaps we should rewrite Table_function to return a RowIterator
|
||
instead of going through a temporary table.
|
||
*/
|
||
class MaterializedTableFunctionIterator final : public TableRowIterator {
|
||
public:
|
||
MaterializedTableFunctionIterator(
|
||
THD *thd, Table_function *table_function, TABLE *table,
|
||
unique_ptr_destroy_only<RowIterator> table_iterator);
|
||
|
||
bool Init() override;
|
||
int Read() override { return m_table_iterator->Read(); }
|
||
std::vector<std::string> DebugString() const override {
|
||
return {{"Materialize table function"}};
|
||
}
|
||
void SetNullRowFlag(bool is_null_row) override {
|
||
m_table_iterator->SetNullRowFlag(is_null_row);
|
||
}
|
||
|
||
void StartPSIBatchMode() override { m_table_iterator->StartPSIBatchMode(); }
|
||
void EndPSIBatchModeIfStarted() override {
|
||
m_table_iterator->EndPSIBatchModeIfStarted();
|
||
}
|
||
|
||
// The temporary table is private to us, so there's no need to worry about
|
||
// locks to other transactions.
|
||
void UnlockRow() override {}
|
||
|
||
private:
|
||
unique_ptr_destroy_only<RowIterator> m_table_iterator;
|
||
|
||
Table_function *m_table_function;
|
||
};
|
||
|
||
/**
|
||
Like semijoin materialization, weedout works on the basic idea that a semijoin
|
||
is just like an inner join as we long as we can get rid of the duplicates
|
||
somehow. (This is advantageous, because inner joins can be reordered, whereas
|
||
semijoins generally can't.) However, unlike semijoin materialization, weedout
|
||
removes duplicates after the join, not before it. Consider something like
|
||
|
||
SELECT * FROM t1 WHERE a IN ( SELECT b FROM t2 );
|
||
|
||
Semijoin materialization solves this by materializing t2, with deduplication,
|
||
and then joining. Weedout joins t1 to t2 and then leaves only one output row
|
||
per t1 row. The disadvantage is that this potentially needs to discard more
|
||
rows; the (potential) advantage is that we deduplicate on t1 instead of t2.
|
||
|
||
Weedout, unlike materialization, works in a streaming fashion; rows are output
|
||
(or discarded) as they come in, with a temporary table used for recording the
|
||
row IDs we've seen before. (We need to deduplicate on t1's row IDs, not its
|
||
contents.) See SJ_TMP_TABLE for details about the table format.
|
||
*/
|
||
class WeedoutIterator final : public RowIterator {
|
||
public:
|
||
WeedoutIterator(THD *thd, unique_ptr_destroy_only<RowIterator> source,
|
||
SJ_TMP_TABLE *sj);
|
||
|
||
bool Init() override;
|
||
int Read() override;
|
||
std::vector<std::string> DebugString() const override;
|
||
|
||
std::vector<Child> children() const override {
|
||
return std::vector<Child>{{m_source.get(), ""}};
|
||
}
|
||
|
||
void SetNullRowFlag(bool is_null_row) override {
|
||
m_source->SetNullRowFlag(is_null_row);
|
||
}
|
||
|
||
void EndPSIBatchModeIfStarted() override {
|
||
m_source->EndPSIBatchModeIfStarted();
|
||
}
|
||
void UnlockRow() override { m_source->UnlockRow(); }
|
||
|
||
private:
|
||
unique_ptr_destroy_only<RowIterator> m_source;
|
||
SJ_TMP_TABLE *m_sj;
|
||
|
||
// The cached value of QEP_TAB::rowid_status for each of the tables in the
|
||
// weedout. Index 0 corresponds to the first table in m_sj.
|
||
// See QEP_TAB::rowid_status for why we need to cache this value.
|
||
Prealloced_array<rowid_statuses, 4> m_rowid_status;
|
||
};
|
||
|
||
/**
|
||
An iterator that removes consecutive rows that are the same according to
|
||
a given index (or more accurately, its keypart), so-called “loose scan”
|
||
(not to be confused with “loose index scan”, which is a QUICK_SELECT_I).
|
||
This is similar in spirit to WeedoutIterator above (removing duplicates
|
||
allows us to treat the semijoin as a normal join), but is much cheaper
|
||
if the data is already ordered/grouped correctly, as the removal can
|
||
happen before the join, and it does not need a temporary table.
|
||
*/
|
||
class RemoveDuplicatesIterator final : public RowIterator {
|
||
public:
|
||
RemoveDuplicatesIterator(THD *thd,
|
||
unique_ptr_destroy_only<RowIterator> source,
|
||
const TABLE *table, KEY *key, size_t key_len);
|
||
|
||
bool Init() override;
|
||
int Read() override;
|
||
std::vector<std::string> DebugString() const override;
|
||
|
||
std::vector<Child> children() const override {
|
||
return std::vector<Child>{{m_source.get(), ""}};
|
||
}
|
||
|
||
void SetNullRowFlag(bool is_null_row) override {
|
||
m_source->SetNullRowFlag(is_null_row);
|
||
}
|
||
|
||
void StartPSIBatchMode() override { m_source->StartPSIBatchMode(); }
|
||
void EndPSIBatchModeIfStarted() override {
|
||
m_source->EndPSIBatchModeIfStarted();
|
||
}
|
||
void UnlockRow() override { m_source->UnlockRow(); }
|
||
|
||
private:
|
||
unique_ptr_destroy_only<RowIterator> m_source;
|
||
const TABLE *m_table;
|
||
KEY *m_key;
|
||
uchar *m_key_buf; // Owned by the THD's MEM_ROOT.
|
||
const size_t m_key_len;
|
||
bool m_first_row;
|
||
};
|
||
|
||
/**
|
||
An iterator that is semantically equivalent to a semijoin NestedLoopIterator
|
||
immediately followed by a RemoveDuplicatesIterator. It is used to implement
|
||
the “loose scan” strategy in queries with multiple tables on the inside of a
|
||
semijoin, like
|
||
|
||
... FROM t1 WHERE ... IN ( SELECT ... FROM t2 JOIN t3 ... )
|
||
|
||
In this case, the query tree without this iterator would ostensibly look like
|
||
|
||
-> Table scan on t1
|
||
-> Remove duplicates on t2_idx
|
||
-> Nested loop semijoin
|
||
-> Index scan on t2 using t2_idx
|
||
-> Filter (e.g. t3.a = t2.a)
|
||
-> Table scan on t3
|
||
|
||
(t3 will be marked as “first match” on t2 when implementing loose scan,
|
||
thus the semijoin.)
|
||
|
||
First note that we can't put the duplicate removal directly on t2 in this
|
||
case, as the first t2 row doesn't necessarily match anything in t3, so it
|
||
needs to be above. However, this is wasteful, because once we find a matching
|
||
t2/t3 pair, we should stop scanning t3 until we have a new t2.
|
||
|
||
NestedLoopSemiJoinWithDuplicateRemovalIterator solves the problem by doing
|
||
exactly this; it gets a row from the outer side, gets exactly one row from the
|
||
inner side, and then skips over rows from the outer side (_without_ scanning
|
||
the inner side) until its keypart changes.
|
||
*/
|
||
class NestedLoopSemiJoinWithDuplicateRemovalIterator final
|
||
: public RowIterator {
|
||
public:
|
||
NestedLoopSemiJoinWithDuplicateRemovalIterator(
|
||
THD *thd, unique_ptr_destroy_only<RowIterator> source_outer,
|
||
unique_ptr_destroy_only<RowIterator> source_inner, const TABLE *table,
|
||
KEY *key, size_t key_len);
|
||
|
||
bool Init() override;
|
||
|
||
int Read() override;
|
||
|
||
void SetNullRowFlag(bool is_null_row) override {
|
||
m_source_outer->SetNullRowFlag(is_null_row);
|
||
m_source_inner->SetNullRowFlag(is_null_row);
|
||
}
|
||
|
||
void EndPSIBatchModeIfStarted() override {
|
||
m_source_outer->EndPSIBatchModeIfStarted();
|
||
m_source_inner->EndPSIBatchModeIfStarted();
|
||
}
|
||
|
||
void UnlockRow() override {
|
||
m_source_outer->UnlockRow();
|
||
m_source_inner->UnlockRow();
|
||
}
|
||
|
||
std::vector<std::string> DebugString() const override;
|
||
|
||
std::vector<Child> children() const override {
|
||
return std::vector<Child>{{m_source_outer.get(), ""},
|
||
{m_source_inner.get(), ""}};
|
||
}
|
||
|
||
private:
|
||
unique_ptr_destroy_only<RowIterator> const m_source_outer;
|
||
unique_ptr_destroy_only<RowIterator> const m_source_inner;
|
||
|
||
const TABLE *m_table_outer;
|
||
KEY *m_key;
|
||
uchar *m_key_buf; // Owned by the THD's MEM_ROOT.
|
||
const size_t m_key_len;
|
||
bool m_deduplicate_against_previous_row;
|
||
};
|
||
|
||
/**
|
||
WindowingIterator is similar to AggregateIterator, but deals with windowed
|
||
aggregates (i.e., OVER expressions). It deals specifically with aggregates
|
||
that don't need to buffer rows.
|
||
|
||
WindowingIterator always outputs to a temporary table. Similarly to
|
||
AggregateIterator, needs to do some of MaterializeIterator's work in
|
||
copying fields and Items into the destination fields (see AggregateIterator
|
||
for more information).
|
||
*/
|
||
class WindowingIterator final : public RowIterator {
|
||
public:
|
||
WindowingIterator(THD *thd, unique_ptr_destroy_only<RowIterator> source,
|
||
Temp_table_param *temp_table_param, // Includes the window.
|
||
JOIN *join, int output_slice);
|
||
|
||
bool Init() override;
|
||
|
||
int Read() override;
|
||
|
||
void SetNullRowFlag(bool is_null_row) override {
|
||
m_source->SetNullRowFlag(is_null_row);
|
||
}
|
||
|
||
void StartPSIBatchMode() override { m_source->StartPSIBatchMode(); }
|
||
void EndPSIBatchModeIfStarted() override {
|
||
m_source->EndPSIBatchModeIfStarted();
|
||
}
|
||
|
||
void UnlockRow() override {
|
||
// There's nothing we can do here.
|
||
}
|
||
|
||
std::vector<std::string> DebugString() const override;
|
||
|
||
std::vector<Child> children() const override {
|
||
return std::vector<Child>{{m_source.get(), ""}};
|
||
}
|
||
|
||
private:
|
||
/// The iterator we are reading from.
|
||
unique_ptr_destroy_only<RowIterator> const m_source;
|
||
|
||
/// Parameters for the temporary table we are outputting to.
|
||
Temp_table_param *m_temp_table_param;
|
||
|
||
/// The window function itself.
|
||
Window *m_window;
|
||
|
||
/// The join we are a part of.
|
||
JOIN *m_join;
|
||
|
||
/// The slice we will be using when reading rows.
|
||
int m_input_slice;
|
||
|
||
/// The slice we will be using when outputting rows.
|
||
int m_output_slice;
|
||
};
|
||
|
||
/**
|
||
BufferingWindowingIterator is like WindowingIterator, but deals with window
|
||
functions that need to buffer rows.
|
||
*/
|
||
class BufferingWindowingIterator final : public RowIterator {
|
||
public:
|
||
BufferingWindowingIterator(
|
||
THD *thd, unique_ptr_destroy_only<RowIterator> source,
|
||
Temp_table_param *temp_table_param, // Includes the window.
|
||
JOIN *join, int output_slice);
|
||
|
||
bool Init() override;
|
||
|
||
int Read() override;
|
||
|
||
void SetNullRowFlag(bool is_null_row) override {
|
||
m_source->SetNullRowFlag(is_null_row);
|
||
}
|
||
|
||
void StartPSIBatchMode() override { m_source->StartPSIBatchMode(); }
|
||
void EndPSIBatchModeIfStarted() override {
|
||
m_source->EndPSIBatchModeIfStarted();
|
||
}
|
||
|
||
void UnlockRow() override {
|
||
// There's nothing we can do here.
|
||
}
|
||
|
||
std::vector<std::string> DebugString() const override;
|
||
|
||
std::vector<Child> children() const override {
|
||
return std::vector<Child>{{m_source.get(), ""}};
|
||
}
|
||
|
||
private:
|
||
int ReadBufferedRow(bool new_partition_or_eof);
|
||
|
||
/// The iterator we are reading from.
|
||
unique_ptr_destroy_only<RowIterator> const m_source;
|
||
|
||
/// Parameters for the temporary table we are outputting to.
|
||
Temp_table_param *m_temp_table_param;
|
||
|
||
/// The window function itself.
|
||
Window *m_window;
|
||
|
||
/// The join we are a part of.
|
||
JOIN *m_join;
|
||
|
||
/// The slice we will be using when reading rows.
|
||
int m_input_slice;
|
||
|
||
/// The slice we will be using when outputting rows.
|
||
int m_output_slice;
|
||
|
||
/// If true, we may have more buffered rows to process that need to be
|
||
/// checked for before reading more rows from the source.
|
||
bool m_possibly_buffered_rows;
|
||
|
||
/// Whether the last input row started a new partition, and was tucked away
|
||
/// to finalize the previous partition; if so, we need to bring it back
|
||
/// for processing before we read more rows.
|
||
bool m_last_input_row_started_new_partition;
|
||
|
||
/// Whether we have seen the last input row.
|
||
bool m_eof;
|
||
};
|
||
|
||
/**
|
||
MaterializeInformationSchemaTableIterator makes sure a given I_S temporary
|
||
table is materialized (filled out) before we try to scan it.
|
||
*/
|
||
class MaterializeInformationSchemaTableIterator final : public RowIterator {
|
||
public:
|
||
MaterializeInformationSchemaTableIterator(
|
||
THD *thd, QEP_TAB *qep_tab,
|
||
unique_ptr_destroy_only<RowIterator> table_iterator);
|
||
|
||
bool Init() override;
|
||
int Read() override { return m_table_iterator->Read(); }
|
||
std::vector<std::string> DebugString() const override;
|
||
|
||
std::vector<Child> children() const override {
|
||
// We don't list the table iterator as an explicit child; we mark it in
|
||
// our DebugString() instead. (Anything else would look confusingly much
|
||
// like a join.)
|
||
return {};
|
||
}
|
||
|
||
void SetNullRowFlag(bool is_null_row) override {
|
||
m_table_iterator->SetNullRowFlag(is_null_row);
|
||
}
|
||
|
||
void StartPSIBatchMode() override { m_table_iterator->StartPSIBatchMode(); }
|
||
void EndPSIBatchModeIfStarted() override {
|
||
m_table_iterator->EndPSIBatchModeIfStarted();
|
||
}
|
||
|
||
// The temporary table is private to us, so there's no need to worry about
|
||
// locks to other transactions.
|
||
void UnlockRow() override {}
|
||
|
||
private:
|
||
/// The iterator that reads from the materialized table.
|
||
unique_ptr_destroy_only<RowIterator> m_table_iterator;
|
||
QEP_TAB *m_qep_tab;
|
||
};
|
||
|
||
/**
|
||
Takes in two or more iterators and output rows from them sequentially
|
||
(first all rows from the first one, the all from the second one, etc.).
|
||
Used for implementing UNION ALL, typically together with StreamingIterator.
|
||
*/
|
||
class AppendIterator final : public RowIterator {
|
||
public:
|
||
AppendIterator(
|
||
THD *thd,
|
||
std::vector<unique_ptr_destroy_only<RowIterator>> &&sub_iterators);
|
||
|
||
bool Init() override;
|
||
int Read() override;
|
||
|
||
std::vector<std::string> DebugString() const override { return {"Append"}; }
|
||
std::vector<Child> children() const override;
|
||
|
||
void StartPSIBatchMode() override;
|
||
void EndPSIBatchModeIfStarted() override;
|
||
|
||
void SetNullRowFlag(bool is_null_row) override;
|
||
void UnlockRow() override;
|
||
|
||
private:
|
||
std::vector<unique_ptr_destroy_only<RowIterator>> m_sub_iterators;
|
||
size_t m_current_iterator_index = 0;
|
||
bool m_pfs_batch_mode_enabled = false;
|
||
};
|
||
|
||
#endif // SQL_COMPOSITE_ITERATORS_INCLUDED
|