polardbxengine/sql/sort_param.h

/* Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License, version 2.0,
   as published by the Free Software Foundation.

   This program is also distributed with certain software (including
   but not limited to OpenSSL) that is licensed under separate terms,
   as designated in a particular file or component or in included license
   documentation.  The authors of MySQL hereby grant you an additional
   permission to link the program and your derivative works with the
   separately licensed software that they have included with MySQL.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License, version 2.0, for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */

#ifndef SORT_PARAM_INCLUDED
#define SORT_PARAM_INCLUDED

#include "field_types.h"   // enum_field_types
#include "my_base.h"       // ha_rows
#include "my_byteorder.h"  // uint2korr
#include "my_dbug.h"
#include "my_inttypes.h"
#include "my_io.h"          // mysql_com.h needs my_socket
#include "mysql_com.h"      // Item_result
#include "sql/sql_array.h"  // Bounds_checked_array
#include "sql/sql_const.h"
#include "sql/sql_sort.h"  // Filesort_info
#include "sql/thr_malloc.h"
#include "sql_string.h"

class Field;
class Filesort;
class Item;
struct TABLE;

enum class Addon_fields_status {
  unknown_status,
  using_addon_fields,

  // The remainder are reasons why we are _not_ using addon fields.
  fulltext_searched,
  keep_rowid,
  row_not_packable,
  row_contains_blob,
  max_length_for_sort_data,
  row_too_large,
  skip_heuristic,
  using_priority_queue
};

inline const char *addon_fields_text(Addon_fields_status afs) {
  switch (afs) {
    default:
      return "unknown";
    case Addon_fields_status::using_addon_fields:
      return "using_addon_fields";
    case Addon_fields_status::fulltext_searched:
      return "fulltext_searched";
    case Addon_fields_status::keep_rowid:
      return "keep_rowid";
    case Addon_fields_status::row_not_packable:
      return "row_not_packable";
    case Addon_fields_status::row_contains_blob:
      return "row_contains_blob";
    case Addon_fields_status::max_length_for_sort_data:
      return "max_length_for_sort_data";
    case Addon_fields_status::row_too_large:
      return "row_too_large";
    case Addon_fields_status::skip_heuristic:
      return "skip_heuristic";
    case Addon_fields_status::using_priority_queue:
      return "using_priority_queue";
  }
}

/* Structs used when sorting */

/// Struct that holds information about a sort field.
struct st_sort_field {
  Field *field;  ///< Field to sort
  Item *item;    ///< Item if not sorting fields

  /// Length of sort field. Beware, can be 0xFFFFFFFFu (infinite)!
  uint length;

  uint suffix_length;           ///< Length suffix (0-4)
  Item_result result_type;      ///< Type of item (not used for fields)
  enum_field_types field_type;  ///< Field type of the field or item
  bool reverse;                 ///< if descending sort
  bool is_varlen;               ///< If key part has variable length
  bool maybe_null;              ///< If key part is nullable
};

/**
  The structure Sort_addon_field describes the layout
  for field values appended to sorted values in records to be sorted
  in the sort buffer.

  Null bit maps for the appended values is placed before the values
  themselves. Offsets are from the last sorted field.

  The structure is used to store values of the additional fields
  in the sort buffer. It is used also when these values are read
  from a temporary file/buffer in 'Filesort_info::unpack_addon_fields'.
*/

struct Sort_addon_field { /* Sort addon packed field */
  Field *field;           /* Original field */
  uint offset;            /* Offset from the last sorted field */
  uint null_offset;       /* Offset to to null bit from the last sorted field */
  uint max_length;        /* Maximum length in the sort buffer */
  uint8 null_bit;         /* Null bit mask for the field */
};

typedef Bounds_checked_array<Sort_addon_field> Addon_fields_array;

/**
  This class wraps information about usage of addon fields.
  An Addon_fields object is used both during packing of data in the filesort
  buffer, and later during unpacking in 'Filesort_info::unpack_addon_fields'.

  @see documentation for the Sort_addon_field struct.
  @see documentation for get_addon_fields()
 */
class Addon_fields {
 public:
  Addon_fields(Addon_fields_array arr)
      : m_field_descriptors(arr),
        m_addon_buf(NULL),
        m_addon_buf_length(0),
        m_using_packed_addons(false) {
    DBUG_ASSERT(!arr.is_null());
  }

  Sort_addon_field *begin() { return m_field_descriptors.begin(); }
  Sort_addon_field *end() { return m_field_descriptors.end(); }
  size_t num_field_descriptors() const { return m_field_descriptors.size(); }

  /// SortFileIterator needs an extra buffer when unpacking.
  uchar *allocate_addon_buf(uint sz) {
    if (m_addon_buf != NULL) {
      DBUG_ASSERT(m_addon_buf_length == sz);
      return m_addon_buf;
    }
    m_addon_buf = static_cast<uchar *>((*THR_MALLOC)->Alloc(sz));
    if (m_addon_buf) m_addon_buf_length = sz;
    return m_addon_buf;
  }

  uchar *get_addon_buf() { return m_addon_buf; }
  uint get_addon_buf_length() const { return m_addon_buf_length; }

  void set_using_packed_addons(bool val) { m_using_packed_addons = val; }

  bool using_packed_addons() const { return m_using_packed_addons; }

  static bool can_pack_addon_fields(uint record_length) {
    return (record_length <= (0xFFFF));
  }

  /**
    @returns Total number of bytes used for packed addon fields.
    the size of the length field + size of null bits + sum of field sizes.
   */
  static uint read_addon_length(uchar *p) {
    return size_of_length_field + uint2korr(p);
  }

  /**
    Stores the number of bytes used for packed addon fields.
   */
  static void store_addon_length(uchar *p, uint sz) {
    // We actually store the length of everything *after* the length field.
    int2store(p, sz - size_of_length_field);
  }

  static const uint size_of_length_field = 2;

 private:
  Addon_fields_array m_field_descriptors;

  uchar *m_addon_buf;          ///< Buffer for unpacking addon fields.
  uint m_addon_buf_length;     ///< Length of the buffer.
  bool m_using_packed_addons;  ///< Are we packing the addon fields?
};

/**
  There are several record formats for sorting:
@verbatim
    |<key a><key b>...    | <rowid> |
    / m_fixed_sort_length / ref_len /
@endverbatim

  or with "addon fields"
@verbatim
    |<key a><key b>...    |<null bits>|<field a><field b>...|
    / m_fixed_sort_length /         addon_length            /
@endverbatim

  The packed format for "addon fields"
@verbatim
    |<key a><key b>...    |<length>|<null bits>|<field a><field b>...|
    / m_fixed_sort_length /         addon_length                     /
@endverbatim

  All the formats above have fixed-size keys, with appropriate padding.
  Fixed-size keys can be compared/sorted using memcmp().

  The packed (variable length) format for keys:
@verbatim
    |<keylen>|<varkey a><key b>...<hash>|<rowid>  or <addons>     |
    / 4 bytes/   keylen bytes           / ref_len or addon_length /
@endverbatim

  This format is currently only used if we are sorting JSON data.
  Variable-size keys must be compared piece-by-piece, using type information
  about each individual key part, @see cmp_varlen_keys.

  All the record formats consist of a (possibly composite) key,
  followed by a (possibly composite) payload.
  The key is used for sorting data. Once sorting is done, the payload is
  stored in some buffer, and read by some RowIterator.

  For fixed-size keys, with @<rowid@> payload, the @<rowid@> is also
  considered to be part of the key.

<dl>
<dt>@<key@>
          <dd>  Fields are fixed-size, specially encoded with
                Field::make_sort_key() so we can do byte-by-byte compare.
<dt>@<length@>
          <dd>  Contains the *actual* packed length (after packing) of
                everything after the sort keys.
                The size of the length field is 2 bytes,
                which should cover most use cases: addon data <= 65535 bytes.
                This is the same as max record size in MySQL.
<dt>@<null bits@>
          <dd>  One bit for each nullable field, indicating whether the field
                is null or not. May have size zero if no fields are nullable.
<dt>@<field xx@>
          <dd>  Are stored with field->pack(), and retrieved with
                field->unpack().
                Addon fields within a record are stored consecutively, with no
                "holes" or padding. They will have zero size for NULL values.
<dt>@<keylen@>
          <dd>  Contains the *actual* packed length of all the keys.
                We may have an arbitrary mix of fixed and variable-sized keys.
<dt>@<hash@>
          <dd>  Optional 8 byte hash, used for GROUPing of JSON values.
<dt>@<varkey@>
          <dd>  Used for JSON and variable-length string values, the format is:
</dl>
@verbatim
                |<null value>|<key length>|<sort key>        |
                / 1 byte     /   4 bytes  / key length bytes /
@endverbatim
<dl>
<dt>@<null value@>
          <dd>  0x00 for NULL. 0xff for NULL under DESC sort. 0x01 for NOT NULL.
<dt>@<key length@>
          <dd>  The length of the sort key, *including* the four bytes for the
                key length. Does not exist if the field is NULL.
</dl>
 */
class Sort_param {
  uint m_fixed_rec_length{0};   ///< Maximum length of a record, see above.
  uint m_fixed_sort_length{0};  ///< Maximum number of bytes used for sorting.
 public:
  uint ref_length{0};        // Length of record ref.
  uint m_addon_length{0};    // Length of added packed fields.
  uint fixed_res_length{0};  // Length of records in final sorted file/buffer.
  uint max_rows_per_buffer{0};  // Max (unpacked) rows / buffer.
  ha_rows max_rows{0};          // Select limit, or HA_POS_ERROR if unlimited.
  TABLE *sort_form{nullptr};    // For quicker make_sortkey.
  bool use_hash{false};         // Whether to use hash to distinguish cut JSON
  bool m_force_stable_sort{false};  // Keep relative order of equal elements
  bool m_remove_duplicates{
      false};  ///< Whether we want to remove duplicate rows

  /// If we are removing duplicate rows and merging, contains a buffer where we
  /// can store the last key seen.
  uchar *m_last_key_seen{nullptr};

  /**
    ORDER BY list with some precalculated info for filesort.
    Array is created and owned by a Filesort instance.
   */
  Bounds_checked_array<st_sort_field> local_sortorder;

  Addon_fields *addon_fields{nullptr};  ///< Descriptors for addon fields.
  bool using_pq{false};
  StringBuffer<STRING_BUFFER_USUAL_SIZE> tmp_buffer;

  /// Decide whether we are to use addon fields (sort rows instead of sorting
  /// row IDs or not). See using_addon_fields().
  void decide_addon_fields(Filesort *file_sort, TABLE *table,
                           ulong max_length_for_sort_data, bool sort_positions);

  /**
    Initialize this struct for filesort() usage.
    @see description of record layout above
    @param [in,out] file_sort sorting information which may be re-used on
                              subsequent invocations of filesort()
    @param sf_array  initialization value for local_sortorder
    @param sortlen   length of sorted columns
    @param table     table to be sorted
    @param max_length_for_sort_data from thd->variables
    @param maxrows   HA_POS_ERROR or possible LIMIT value
    @param remove_duplicates if true, items with duplicate keys will be removed
  */
  void init_for_filesort(Filesort *file_sort,
                         Bounds_checked_array<st_sort_field> sf_array,
                         uint sortlen, TABLE *table,
                         ulong max_length_for_sort_data, ha_rows maxrows,
                         bool remove_duplicates);

  /// Enables the packing of addons if possible.
  void try_to_pack_addons(ulong max_length_for_sort_data);

  /// Are we packing the "addon fields"?
  bool using_packed_addons() const {
    DBUG_ASSERT(m_using_packed_addons ==
                (addon_fields != NULL && addon_fields->using_packed_addons()));
    return m_using_packed_addons;
  }

  /// Are we using varlen key fields?
  bool using_varlen_keys() const { return m_num_varlen_keys > 0; }

  /// Are we using any JSON key fields?
  bool using_json_keys() const { return m_num_json_keys > 0; }

  /// Are we using "addon fields"? Note that decide_addon_fields() or
  /// init_for_filesort() must be called before checking this.
  bool using_addon_fields() const { return addon_fields != NULL; }

  /**
    Stores key fields in *dst.
    Then appends either *ref_pos (the @<rowid@>) or the "addon fields".
    @param  dst     out Where to store the result
    @param  ref_pos in  Where to find the @<rowid@>
    @returns Number of bytes stored, or UINT_MAX if the result could not
      provably fit within the destination buffer.
   */
  uint make_sortkey(Bounds_checked_array<uchar> dst, const uchar *ref_pos);

  // Adapter for Bounded_queue.
  uint make_sortkey(uchar *dst, size_t dst_len, const uchar *ref_pos) {
    return make_sortkey(Bounds_checked_array<uchar>(dst, dst_len), ref_pos);
  }

  /// Stores the length of a variable-sized key.
  static void store_varlen_key_length(uchar *p, uint sz) { int4store(p, sz); }

  /// Skips the key part, and returns address of payload.
  uchar *get_start_of_payload(uchar *p) const {
    size_t offset = using_varlen_keys() ? uint4korr(p) : max_compare_length();
    if (!using_addon_fields() && !using_varlen_keys())
      offset -= ref_length;  // The reference is also part of the sort key.
    return p + offset;
  }

  /**
    Skips the key part, and returns address of payload.
    For SortBufferIterator, which does not have access to Sort_param.
   */
  static uchar *get_start_of_payload(uint default_val, bool is_varlen,
                                     uchar *p) {
    size_t offset = is_varlen ? uint4korr(p) : default_val;
    return p + offset;
  }

  /// @returns The number of bytes used for sorting of fixed-size keys.
  uint max_compare_length() const { return m_fixed_sort_length; }

  void set_max_compare_length(uint len) { m_fixed_sort_length = len; }

  /// @returns The actual size of a record (key + addons)
  size_t get_record_length(uchar *p) const;

  /// @returns The maximum size of a record (key + addons)
  uint max_record_length() const { return m_fixed_rec_length; }

  void set_max_record_length(uint len) { m_fixed_rec_length = len; }

  /**
    Getter for record length and result length.
    @param record_start Pointer to record.
    @param [out] recl   Store record length here.
    @param [out] resl   Store result length here.
   */
  void get_rec_and_res_len(uchar *record_start, uint *recl, uint *resl);

  enum enum_sort_algorithm {
    FILESORT_ALG_NONE,
    FILESORT_ALG_STD_SORT,
    FILESORT_ALG_STD_STABLE
  };
  enum_sort_algorithm m_sort_algorithm{FILESORT_ALG_NONE};

  Addon_fields_status m_addon_fields_status{
      Addon_fields_status::unknown_status};

  static const uint size_of_varlength_field = 4;

 private:
  /// Counts number of varlen keys
  int count_varlen_keys() const;
  /// Counts number of JSON keys
  int count_json_keys() const;

  /// total length of fields which have a packable type
  uint m_packable_length{0};
  /// caches the value of using_packed_addons()
  bool m_using_packed_addons{false};
  int m_num_varlen_keys{0};  ///< number of varlen keys
  int m_num_json_keys{0};    ///< number of JSON keys

 public:
  Sort_param() = default;
  // Not copyable.
  Sort_param(const Sort_param &) = delete;
  Sort_param &operator=(const Sort_param &) = delete;
};

inline uchar *get_start_of_payload(const Filesort_info *fsi, uchar *p) {
  return Sort_param::get_start_of_payload(fsi->sort_length(),
                                          fsi->using_varlen_keys(), p);
}

/// Are we using "packed addon fields"?
inline bool using_packed_addons(const Filesort_info *fsi) {
  return fsi->addon_fields != nullptr &&
         fsi->addon_fields->using_packed_addons();
}

#endif  // SORT_PARAM_INCLUDED