polardbxengine/storage/xengine/core/memtable/art.h

181 lines
8.0 KiB
C++

/*
* Copyright (c) 2020, Alibaba Group Holding Limited
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "memtable/art_node.h"
#include "xengine/xengine_constants.h"
#include "xengine/slice.h"
#include "logger/log_module.h"
#include "util/coding.h"
#include "util/common.h"
#include "port/likely.h"
#include "xengine/status.h"
#include "memory/base_malloc.h"
#include "xengine/memtablerep.h"
#include "memory/allocator.h"
#include "logger/logger.h"
#include "util/ebr.h"
#include <sched.h>
#include <functional>
#include <cstdio>
#include <queue>
#include <unistd.h>
namespace xengine {
namespace memtable {
class ART {
private:
ARTNodeBase *root_;
const MemTableRep::KeyComparator& cmp_;
memory::Allocator* allocator_;
ARTValue dummy_node_;
std::atomic<uint64_t> memory_usage_;
std::atomic<uint64_t> num_entries_;
/*
* This function is very important for ART.
* Since we use optimistic lock coupling concurrency control method,
* if there is a high contention workload and this thread has retried many times,
* we should call sched_yield to relinquish the CPU time slice of this thread
* to allow other threads to do first.
*
* If we don't call sched_yield, we may suffer the performance degradation.
*/
void yield(int count) const {
if (count > 3) {
sched_yield();
} else {
PAUSE();
}
}
void calc_prefix(common::Slice &lhs, common::Slice &rhs, uint32_t &byte_pos, uint32_t limit);
int alloc_newnode(ARTNodeBase *&new_node, const uint8_t *prefix, uint16_t prefix_len);
void insert_into_value_list(ARTValue *insert_artvalue, ARTValue *precursor, common::Slice &insert_key);
ARTValue *scan_backward_for_less_than(ARTValue *start, const common::Slice &target) const;
ARTValue *scan_backward_for_greater_than_or_equal(ARTValue *start, const common::Slice &target) const;
ARTValue *scan_forward_for_greater_than_or_equal(ARTValue *start, const common::Slice &target) const;
int largest_artvalue_in_subtrie(void *root, ARTValue *&largest_artvalue,
void *p_node, uint64_t p_version_snapshot) const;
int smallest_artvalue_in_subtrie(void *root, ARTValue *&smallest_artvalue,
void *p_node, uint64_t p_version_snapshot) const;
int largest_artvalue_in_subtrie_wrap(void *root, ARTValue *&largest_artvalue) const;
int smallest_artvalue_in_subtrie_wrap(void *root, ARTValue *&smallest_artvalue) const;
int insert_maybe_overflow(uint8_t curr_key, ARTValue *insert_artvalue, ARTNodeBase *curr_node, uint64_t curr_version_snapshot,
uint8_t parent_key, ARTNodeBase *parent_node, uint64_t parent_version_snapshot);
int split_prefix_with_normal_case(common::Slice &insert_key, uint32_t start_byte_pos, uint32_t shared_prefix,
ARTValue *insert_artvalue, ARTNodeBase *curr_node, uint64_t current_version_snapshot,
uint8_t parent_key, ARTNodeBase *parent_node, uint64_t parent_version_snapshot);
int split_prefix_with_no_differentiable_suffix(common::Slice &insert_key, uint32_t start_byte_pos, uint32_t shared_prefix,
ARTValue *insert_artvalue, ARTNodeBase *curr_node, uint64_t current_version_snapshot,
uint8_t parent_key, ARTNodeBase *parent_node, uint64_t parent_version_snapshot);
int insert_maybe_split_artvalue(common::Slice &insert_key, ARTValue *insert_artvalue, uint32_t start_byte_pos,
uint8_t curr_key, ARTNodeBase *curr_node, uint64_t curr_version_snapshot,
ARTNodeBase *parent_node, uint64_t parent_version_snapshot);
int split_artvalue_with_normal_case(common::Slice &insert_key, ARTValue *insert_artvalue, uint32_t start_byte_pos, uint32_t shared_prefix,
common::Slice &split_key, ARTValue *split_artvalue, uint8_t curr_key, ARTNodeBase *curr_node);
int split_artvalue_with_no_differentiable_suffix(common::Slice &insert_key, ARTValue *insert_artvalue,
uint32_t start_byte_pos, uint32_t shared_prefix,
common::Slice &split_key, ARTValue *split_artvalue,
uint8_t curr_key, ARTNodeBase *curr_node);
int insert_with_no_split_artvalue(common::Slice &insert_key, ARTValue *insert_artvalue, common::Slice &split_key, ARTValue *split_artvalue,
uint8_t curr_key, ARTNodeBase *curr_node);
int insert_inner(ARTValue *insert_artvalue);
int estimate_lower_bound_count_inner(const common::Slice &target, int64_t &count) const;
int lower_bound_inner(const common::Slice &target, ARTValue *&smallest_successor) const;
public:
ART(const MemTableRep::KeyComparator& cmp, memory::Allocator* allocator)
: root_(nullptr),
cmp_(cmp),
allocator_(allocator),
dummy_node_(),
memory_usage_(0),
num_entries_(0) {}
/*
* We will use memory pool to alloc all memory of the ART(in the future),
* so we do nothing in the destructor
*/
~ART() { RELEASE(root_); }
int init();
const MemTableRep::KeyComparator &comparator() const { return cmp_; }
ARTValue *dummy_node() { return &dummy_node_; }
ARTValue *allocate_art_value(int32_t kv_len) {
char *raw = allocator_->AllocateAligned(sizeof(ARTValue) + kv_len);
return new (raw) ARTValue();
}
void insert(ARTValue *art_value);
void lower_bound(const common::Slice &target, ARTValue *&smallest_successor) const;
void estimate_lower_bound_count(const common::Slice &target, int64_t &count) const;
uint64_t estimate_count(const common::Slice &start_ikey, const common::Slice &end_ikey) const;
bool contains(const char *target) const;
size_t approximate_memory_usage() { return memory_usage_.load(); }
static void ebr_delete(void *p) {
ARTNodeBase *node = reinterpret_cast<ARTNodeBase *>(p);
MOD_DELETE_OBJECT(ARTNodeBase, node);
}
void dump_art_struct();
class Iterator {
private:
ART *art_;
ARTValue *curr_artvalue_;
public:
explicit Iterator(ART *index) : art_(index), curr_artvalue_(index->dummy_node()) {}
void set_art(ART *index) {
art_ = index;
curr_artvalue_ = index->dummy_node();
}
const char *entry() const { return curr_artvalue_->entry(); }
Slice key() { return curr_artvalue_->key(); }
Slice value() { return curr_artvalue_->value(); }
bool valid() const { return curr_artvalue_ != art_->dummy_node(); }
void seek_to_first() { curr_artvalue_ = art_->dummy_node()->next(); }
void seek_to_last() { curr_artvalue_ = art_->dummy_node()->prev(); }
void next() {
assert(valid());
curr_artvalue_ = curr_artvalue_->next();
}
void prev() {
assert(valid());
curr_artvalue_ = curr_artvalue_->prev();
}
void seek(const common::Slice &target) {
art_->lower_bound(target, curr_artvalue_);
}
void seek(const char *target) {
Slice key = GetLengthPrefixedSlice(target);
art_->lower_bound(key, curr_artvalue_);
}
void seek_for_prev(const common::Slice &target) {
seek(target);
if (!valid()) {
seek_to_last();
}
while (valid() && art_->comparator()(curr_artvalue_->entry(), target) > 0) {
prev();
}
}
void seek_for_prev(const char *target) {
Slice key = GetLengthPrefixedSlice(target);
seek_for_prev(key);
}
};
};
} // namespace memtable
} // namespace xengine