polardbxengine/plugin/innodb_memcached/daemon_memcached/engines/default_engine/items.c

1208 lines
40 KiB
C

/* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
#include "config.h"
#include <fcntl.h>
#include <errno.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <time.h>
#include <assert.h>
#include <inttypes.h>
#include "default_engine.h"
/* Forward Declarations */
static void item_link_q(struct default_engine *engine, hash_item *it);
static void item_unlink_q(struct default_engine *engine, hash_item *it);
static hash_item *do_item_alloc(struct default_engine *engine,
const void *key, const size_t nkey,
const int flags, const rel_time_t exptime,
const int nbytes,
const void *cookie);
static hash_item *do_item_get(struct default_engine *engine,
const char *key, const size_t nkey);
static int do_item_link(struct default_engine *engine, hash_item *it);
static void do_item_unlink(struct default_engine *engine, hash_item *it);
static void do_item_release(struct default_engine *engine, hash_item *it);
static void do_item_update(struct default_engine *engine, hash_item *it);
static int do_item_replace(struct default_engine *engine,
hash_item *it, hash_item *new_it);
static void item_free(struct default_engine *engine, hash_item *it);
/*
* We only reposition items in the LRU queue if they haven't been repositioned
* in this many seconds. That saves us from churning on frequently-accessed
* items.
*/
#define ITEM_UPDATE_INTERVAL 60
/*
* To avoid scanning through the complete cache in some circumstances we'll
* just give up and return an error after inspecting a fixed number of objects.
*/
static const int search_items = 50;
void item_stats_reset(struct default_engine *engine) {
pthread_mutex_lock(&engine->cache_lock);
memset(engine->items.itemstats, 0, sizeof(engine->items.itemstats));
pthread_mutex_unlock(&engine->cache_lock);
}
/* warning: don't use these macros with a function, as it evals its arg twice */
static inline size_t ITEM_ntotal(struct default_engine *engine,
const hash_item *item) {
size_t ret = sizeof(*item) + item->nkey + item->nbytes;
if (engine->config.use_cas) {
ret += sizeof(uint64_t);
}
return ret;
}
/* Get the next CAS id for a new item. */
static uint64_t get_cas_id(void) {
static uint64_t cas_id = 0;
return ++cas_id;
}
/* Enable this for reference-count debugging. */
#if 0
# define DEBUG_REFCNT(it,op) \
fprintf(stderr, "item %x refcnt(%c) %d %c%c%c\n", \
it, op, it->refcount, \
(it->it_flags & ITEM_LINKED) ? 'L' : ' ', \
(it->it_flags & ITEM_SLABBED) ? 'S' : ' ')
#else
# define DEBUG_REFCNT(it,op) while(0)
#endif
/*@null@*/
hash_item *do_item_alloc(struct default_engine *engine,
const void *key,
const size_t nkey,
const int flags,
const rel_time_t exptime,
const int nbytes,
const void *cookie) {
hash_item *it = NULL;
size_t ntotal = sizeof(hash_item) + nkey + nbytes;
if (engine->config.use_cas) {
ntotal += sizeof(uint64_t);
}
unsigned int id = slabs_clsid(engine, ntotal);
if (id == 0)
return 0;
/* do a quick check if we have any expired items in the tail.. */
int tries = search_items;
hash_item *search;
rel_time_t current_time = engine->server.core->get_current_time();
for (search = engine->items.tails[id];
tries > 0 && search != NULL;
tries--, search=search->prev) {
if (search->refcount == 0 &&
(search->exptime != 0 && search->exptime < current_time)) {
it = search;
/* I don't want to actually free the object, just steal
* the item to avoid to grab the slab mutex twice ;-)
*/
pthread_mutex_lock(&engine->stats.lock);
engine->stats.reclaimed++;
pthread_mutex_unlock(&engine->stats.lock);
engine->items.itemstats[id].reclaimed++;
it->refcount = 1;
slabs_adjust_mem_requested(engine, it->slabs_clsid, ITEM_ntotal(engine, it), ntotal);
do_item_unlink(engine, it);
/* Initialize the item block: */
it->slabs_clsid = 0;
it->refcount = 0;
break;
}
}
if (it == NULL && (it = slabs_alloc(engine, ntotal, id)) == NULL) {
/*
** Could not find an expired item at the tail, and memory allocation
** failed. Try to evict some items!
*/
tries = search_items;
/* If requested to not push old items out of cache when memory runs out,
* we're out of luck at this point...
*/
if (engine->config.evict_to_free == 0) {
engine->items.itemstats[id].outofmemory++;
return NULL;
}
/*
* try to get one off the right LRU
* don't necessariuly unlink the tail because it may be locked: refcount>0
* search up from tail an item with refcount==0 and unlink it; give up after search_items
* tries
*/
if (engine->items.tails[id] == 0) {
engine->items.itemstats[id].outofmemory++;
return NULL;
}
for (search = engine->items.tails[id]; tries > 0 && search != NULL; tries--, search=search->prev) {
if (search->refcount == 0) {
if (search->exptime == 0 || search->exptime > current_time) {
engine->items.itemstats[id].evicted++;
engine->items.itemstats[id].evicted_time = current_time - search->time;
if (search->exptime != 0) {
engine->items.itemstats[id].evicted_nonzero++;
}
pthread_mutex_lock(&engine->stats.lock);
engine->stats.evictions++;
pthread_mutex_unlock(&engine->stats.lock);
engine->server.stat->evicting(cookie,
item_get_key(search),
search->nkey);
} else {
engine->items.itemstats[id].reclaimed++;
pthread_mutex_lock(&engine->stats.lock);
engine->stats.reclaimed++;
pthread_mutex_unlock(&engine->stats.lock);
}
do_item_unlink(engine, search);
break;
}
}
it = slabs_alloc(engine, ntotal, id);
if (it == 0) {
engine->items.itemstats[id].outofmemory++;
/* Last ditch effort. There is a very rare bug which causes
* refcount leaks. We've fixed most of them, but it still happens,
* and it may happen in the future.
* We can reasonably assume no item can stay locked for more than
* three hours, so if we find one in the tail which is that old,
* free it anyway.
*/
tries = search_items;
for (search = engine->items.tails[id]; tries > 0 && search != NULL; tries--, search=search->prev) {
if (search->refcount != 0 && search->time + TAIL_REPAIR_TIME < current_time) {
engine->items.itemstats[id].tailrepairs++;
search->refcount = 0;
do_item_unlink(engine, search);
break;
}
}
it = slabs_alloc(engine, ntotal, id);
if (it == 0) {
return NULL;
}
}
}
assert(it->slabs_clsid == 0);
it->slabs_clsid = id;
assert(it != engine->items.heads[it->slabs_clsid]);
it->next = it->prev = it->h_next = 0;
it->refcount = 1; /* the caller will have a reference */
DEBUG_REFCNT(it, '*');
it->iflag = engine->config.use_cas ? ITEM_WITH_CAS : 0;
it->nkey = nkey;
it->nbytes = nbytes;
it->flags = flags;
memcpy((void*)item_get_key(it), key, nkey);
it->exptime = exptime;
return it;
}
static void item_free(struct default_engine *engine, hash_item *it) {
size_t ntotal = ITEM_ntotal(engine, it);
unsigned int clsid;
assert((it->iflag & ITEM_LINKED) == 0);
assert(it != engine->items.heads[it->slabs_clsid]);
assert(it != engine->items.tails[it->slabs_clsid]);
assert(it->refcount == 0);
/* so slab size changer can tell later if item is already free or not */
clsid = it->slabs_clsid;
it->slabs_clsid = 0;
it->iflag |= ITEM_SLABBED;
DEBUG_REFCNT(it, 'F');
slabs_free(engine, it, ntotal, clsid);
}
static void item_link_q(struct default_engine *engine, hash_item *it) { /* item is the new head */
hash_item **head, **tail;
assert(it->slabs_clsid < POWER_LARGEST);
assert((it->iflag & ITEM_SLABBED) == 0);
head = &engine->items.heads[it->slabs_clsid];
tail = &engine->items.tails[it->slabs_clsid];
assert(it != *head);
assert((*head && *tail) || (*head == 0 && *tail == 0));
it->prev = 0;
it->next = *head;
if (it->next) it->next->prev = it;
*head = it;
if (*tail == 0) *tail = it;
engine->items.sizes[it->slabs_clsid]++;
return;
}
static void item_unlink_q(struct default_engine *engine, hash_item *it) {
hash_item **head, **tail;
assert(it->slabs_clsid < POWER_LARGEST);
head = &engine->items.heads[it->slabs_clsid];
tail = &engine->items.tails[it->slabs_clsid];
if (*head == it) {
assert(it->prev == 0);
*head = it->next;
}
if (*tail == it) {
assert(it->next == 0);
*tail = it->prev;
}
assert(it->next != it);
assert(it->prev != it);
if (it->next) it->next->prev = it->prev;
if (it->prev) it->prev->next = it->next;
engine->items.sizes[it->slabs_clsid]--;
return;
}
int do_item_link(struct default_engine *engine, hash_item *it) {
MEMCACHED_ITEM_LINK(item_get_key(it), it->nkey, it->nbytes);
assert((it->iflag & (ITEM_LINKED|ITEM_SLABBED)) == 0);
assert(it->nbytes < (1024 * 1024)); /* 1MB max size */
it->iflag |= ITEM_LINKED;
it->time = engine->server.core->get_current_time();
assoc_insert(engine, engine->server.core->hash(item_get_key(it),
it->nkey, 0),
it);
pthread_mutex_lock(&engine->stats.lock);
engine->stats.curr_bytes += ITEM_ntotal(engine, it);
engine->stats.curr_items += 1;
engine->stats.total_items += 1;
pthread_mutex_unlock(&engine->stats.lock);
/* Allocate a new CAS ID on link. */
item_set_cas(NULL, NULL, it, get_cas_id());
item_link_q(engine, it);
return 1;
}
void do_item_unlink(struct default_engine *engine, hash_item *it) {
MEMCACHED_ITEM_UNLINK(item_get_key(it), it->nkey, it->nbytes);
if ((it->iflag & ITEM_LINKED) != 0) {
it->iflag &= ~ITEM_LINKED;
pthread_mutex_lock(&engine->stats.lock);
engine->stats.curr_bytes -= ITEM_ntotal(engine, it);
engine->stats.curr_items -= 1;
pthread_mutex_unlock(&engine->stats.lock);
assoc_delete(engine, engine->server.core->hash(item_get_key(it),
it->nkey, 0),
item_get_key(it), it->nkey);
item_unlink_q(engine, it);
if (it->refcount == 0) {
item_free(engine, it);
}
}
}
void do_item_release(struct default_engine *engine, hash_item *it) {
MEMCACHED_ITEM_REMOVE(item_get_key(it), it->nkey, it->nbytes);
if (it->refcount != 0) {
it->refcount--;
DEBUG_REFCNT(it, '-');
}
if (it->refcount == 0 && (it->iflag & ITEM_LINKED) == 0) {
item_free(engine, it);
}
}
void do_item_update(struct default_engine *engine, hash_item *it) {
rel_time_t current_time = engine->server.core->get_current_time();
MEMCACHED_ITEM_UPDATE(item_get_key(it), it->nkey, it->nbytes);
if (it->time < current_time - ITEM_UPDATE_INTERVAL) {
assert((it->iflag & ITEM_SLABBED) == 0);
if ((it->iflag & ITEM_LINKED) != 0) {
item_unlink_q(engine, it);
it->time = current_time;
item_link_q(engine, it);
}
}
}
int do_item_replace(struct default_engine *engine,
hash_item *it, hash_item *new_it) {
MEMCACHED_ITEM_REPLACE(item_get_key(it), it->nkey, it->nbytes,
item_get_key(new_it), new_it->nkey, new_it->nbytes);
assert((it->iflag & ITEM_SLABBED) == 0);
do_item_unlink(engine, it);
return do_item_link(engine, new_it);
}
/*@null@*/
static char *do_item_cachedump(const unsigned int slabs_clsid,
const unsigned int limit,
unsigned int *bytes) {
#ifdef FUTURE
unsigned int memlimit = 2 * 1024 * 1024; /* 2MB max response size */
char *buffer;
unsigned int bufcurr;
hash_item *it;
unsigned int len;
unsigned int shown = 0;
char key_temp[KEY_MAX_LENGTH + 1];
char temp[512];
it = engine->items.heads[slabs_clsid];
buffer = malloc((size_t)memlimit);
if (buffer == 0) return NULL;
bufcurr = 0;
while (it != NULL && (limit == 0 || shown < limit)) {
assert(it->nkey <= KEY_MAX_LENGTH);
/* Copy the key since it may not be null-terminated in the struct */
strncpy(key_temp, item_get_key(it), it->nkey);
key_temp[it->nkey] = 0x00; /* terminate */
len = snprintf(temp, sizeof(temp), "ITEM %s [%d b; %lu s]\r\n",
key_temp, it->nbytes,
(unsigned long)it->exptime + process_started);
if (bufcurr + len + 6 > memlimit) /* 6 is END\r\n\0 */
break;
memcpy(buffer + bufcurr, temp, len);
bufcurr += len;
shown++;
it = it->next;
}
memcpy(buffer + bufcurr, "END\r\n", 6);
bufcurr += 5;
*bytes = bufcurr;
return buffer;
#endif
(void)slabs_clsid;
(void)limit;
(void)bytes;
return NULL;
}
static void do_item_stats(struct default_engine *engine,
ADD_STAT add_stats, const void *c) {
int i;
rel_time_t current_time = engine->server.core->get_current_time();
for (i = 0; i < POWER_LARGEST; i++) {
if (engine->items.tails[i] != NULL) {
int search = search_items;
while (search > 0 &&
engine->items.tails[i] != NULL &&
((engine->config.oldest_live != 0 && /* Item flushd */
engine->config.oldest_live <= current_time &&
engine->items.tails[i]->time <= engine->config.oldest_live) ||
(engine->items.tails[i]->exptime != 0 && /* and not expired */
engine->items.tails[i]->exptime < current_time))) {
--search;
if (engine->items.tails[i]->refcount == 0) {
do_item_unlink(engine, engine->items.tails[i]);
} else {
break;
}
}
if (engine->items.tails[i] == NULL) {
/* We removed all of the items in this slab class */
continue;
}
const char *prefix = "items";
add_statistics(c, add_stats, prefix, i, "number", "%u",
engine->items.sizes[i]);
add_statistics(c, add_stats, prefix, i, "age", "%u",
engine->items.tails[i]->time);
add_statistics(c, add_stats, prefix, i, "evicted",
"%u", engine->items.itemstats[i].evicted);
add_statistics(c, add_stats, prefix, i, "evicted_nonzero",
"%u", engine->items.itemstats[i].evicted_nonzero);
add_statistics(c, add_stats, prefix, i, "evicted_time",
"%u", engine->items.itemstats[i].evicted_time);
add_statistics(c, add_stats, prefix, i, "outofmemory",
"%u", engine->items.itemstats[i].outofmemory);
add_statistics(c, add_stats, prefix, i, "tailrepairs",
"%u", engine->items.itemstats[i].tailrepairs);;
add_statistics(c, add_stats, prefix, i, "reclaimed",
"%u", engine->items.itemstats[i].reclaimed);;
}
}
}
/** dumps out a list of objects of each size, with granularity of 32 bytes */
/*@null@*/
static void do_item_stats_sizes(struct default_engine *engine,
ADD_STAT add_stats, const void *c) {
/* max 1MB object, divided into 32 bytes size buckets */
const int num_buckets = 32768;
unsigned int *histogram = calloc(num_buckets, sizeof(int));
if (histogram != NULL) {
int i;
/* build the histogram */
for (i = 0; i < POWER_LARGEST; i++) {
hash_item *iter = engine->items.heads[i];
while (iter) {
int ntotal = ITEM_ntotal(engine, iter);
int bucket = ntotal / 32;
if ((ntotal % 32) != 0) bucket++;
if (bucket < num_buckets) histogram[bucket]++;
iter = iter->next;
}
}
/* write the buffer */
for (i = 0; i < num_buckets; i++) {
if (histogram[i] != 0) {
char key[8], val[32];
int klen, vlen;
klen = snprintf(key, sizeof(key), "%d", i * 32);
vlen = snprintf(val, sizeof(val), "%u", histogram[i]);
assert(klen < sizeof(key));
assert(vlen < sizeof(val));
add_stats(key, klen, val, vlen, c);
}
}
free(histogram);
}
}
/** wrapper around assoc_find which does the lazy expiration logic */
hash_item *do_item_get(struct default_engine *engine,
const char *key, const size_t nkey) {
rel_time_t current_time = engine->server.core->get_current_time();
hash_item *it = assoc_find(engine, engine->server.core->hash(key,
nkey, 0),
key, nkey);
int was_found = 0;
if (engine->config.verbose > 2) {
EXTENSION_LOGGER_DESCRIPTOR *logger;
logger = (void*)engine->server.extension->get_extension(EXTENSION_LOGGER);
if (it == NULL) {
logger->log(EXTENSION_LOG_DEBUG, NULL,
"> NOT FOUND %s", key);
} else {
logger->log(EXTENSION_LOG_DEBUG, NULL,
"> FOUND KEY %s",
(const char*)item_get_key(it));
was_found++;
}
}
if (it != NULL && engine->config.oldest_live != 0 &&
engine->config.oldest_live <= current_time &&
it->time <= engine->config.oldest_live) {
do_item_unlink(engine, it); /* MTSAFE - cache_lock held */
it = NULL;
}
if (it == NULL && was_found) {
EXTENSION_LOGGER_DESCRIPTOR *logger;
logger = (void*)engine->server.extension->get_extension(EXTENSION_LOGGER);
logger->log(EXTENSION_LOG_DEBUG, NULL, " -nuked by flush");
was_found--;
}
if (it != NULL && it->exptime != 0 && it->exptime <= current_time) {
do_item_unlink(engine, it); /* MTSAFE - cache_lock held */
it = NULL;
}
if (it == NULL && was_found) {
EXTENSION_LOGGER_DESCRIPTOR *logger;
logger = (void*)engine->server.extension->get_extension(EXTENSION_LOGGER);
logger->log(EXTENSION_LOG_DEBUG, NULL, " -nuked by expire");
was_found--;
}
if (it != NULL) {
it->refcount++;
DEBUG_REFCNT(it, '+');
do_item_update(engine, it);
}
return it;
}
/*
* Stores an item in the cache according to the semantics of one of the set
* commands. In threaded mode, this is protected by the cache lock.
*
* Returns the state of storage.
*/
static ENGINE_ERROR_CODE do_store_item(struct default_engine *engine,
hash_item *it, uint64_t *cas,
ENGINE_STORE_OPERATION operation,
const void *cookie) {
const char *key = item_get_key(it);
hash_item *old_it = do_item_get(engine, key, it->nkey);
ENGINE_ERROR_CODE stored = ENGINE_NOT_STORED;
hash_item *new_it = NULL;
if (old_it != NULL && operation == OPERATION_ADD) {
/* add only adds a nonexistent item, but promote to head of LRU */
do_item_update(engine, old_it);
} else if (!old_it && (operation == OPERATION_REPLACE
|| operation == OPERATION_APPEND || operation == OPERATION_PREPEND))
{
/* replace only replaces an existing value; don't store */
} else if (operation == OPERATION_CAS) {
/* validate cas operation */
if(old_it == NULL) {
// LRU expired
stored = ENGINE_KEY_ENOENT;
}
else if (item_get_cas(it) == item_get_cas(old_it)) {
// cas validates
// it and old_it may belong to different classes.
// I'm updating the stats for the one that's getting pushed out
do_item_replace(engine, old_it, it);
stored = ENGINE_SUCCESS;
} else {
if (engine->config.verbose > 1) {
EXTENSION_LOGGER_DESCRIPTOR *logger;
logger = (void*)engine->server.extension->get_extension(EXTENSION_LOGGER);
logger->log(EXTENSION_LOG_INFO, NULL,
"CAS: failure: expected %"PRIu64", got %"PRIu64"\n",
item_get_cas(old_it),
item_get_cas(it));
}
stored = ENGINE_KEY_EEXISTS;
}
} else {
/*
* Append - combine new and old record into single one. Here it's
* atomic and thread-safe.
*/
if (operation == OPERATION_APPEND || operation == OPERATION_PREPEND) {
/*
* Validate CAS
*/
if (item_get_cas(it) != 0) {
// CAS much be equal
if (item_get_cas(it) != item_get_cas(old_it)) {
stored = ENGINE_KEY_EEXISTS;
}
}
if (stored == ENGINE_NOT_STORED) {
/* we have it and old_it here - alloc memory to hold both */
new_it = do_item_alloc(engine, key, it->nkey,
old_it->flags,
old_it->exptime,
it->nbytes + old_it->nbytes,
cookie);
if (new_it == NULL) {
/* SERVER_ERROR out of memory */
if (old_it != NULL) {
do_item_release(engine, old_it);
}
return ENGINE_NOT_STORED;
}
/* copy data from it and old_it to new_it */
if (operation == OPERATION_APPEND) {
memcpy(item_get_data(new_it), item_get_data(old_it), old_it->nbytes);
memcpy(item_get_data(new_it) + old_it->nbytes, item_get_data(it), it->nbytes);
} else {
/* OPERATION_PREPEND */
memcpy(item_get_data(new_it), item_get_data(it), it->nbytes);
memcpy(item_get_data(new_it) + it->nbytes, item_get_data(old_it), old_it->nbytes);
}
it = new_it;
}
}
if (stored == ENGINE_NOT_STORED) {
if (old_it != NULL) {
do_item_replace(engine, old_it, it);
} else {
do_item_link(engine, it);
}
*cas = item_get_cas(it);
stored = ENGINE_SUCCESS;
}
}
if (old_it != NULL) {
do_item_release(engine, old_it); /* release our reference */
}
if (new_it != NULL) {
do_item_release(engine, new_it);
}
if (stored == ENGINE_SUCCESS) {
*cas = item_get_cas(it);
}
return stored;
}
/*
* adds a delta value to a numeric item.
*
* c connection requesting the operation
* it item to adjust
* incr true to increment value, false to decrement
* delta amount to adjust value by
* buf buffer for response string
*
* returns a response string to send back to the client.
*/
static ENGINE_ERROR_CODE do_add_delta(struct default_engine *engine,
hash_item *it, const bool incr,
const int64_t delta, uint64_t *rcas,
uint64_t *result, const void *cookie) {
const char *ptr;
uint64_t value;
char buf[80];
int res;
if (it->nbytes >= (sizeof(buf) - 1)) {
return ENGINE_EINVAL;
}
ptr = item_get_data(it);
memcpy(buf, ptr, it->nbytes);
buf[it->nbytes] = '\0';
if (!safe_strtoull(buf, &value)) {
return ENGINE_EINVAL;
}
if (incr) {
value += delta;
} else {
if(delta > value) {
value = 0;
} else {
value -= delta;
}
}
*result = value;
if ((res = snprintf(buf, sizeof(buf), "%" PRIu64, value)) == -1) {
return ENGINE_EINVAL;
}
if (it->refcount == 1 && res <= it->nbytes) {
// we can do inline replacement
memcpy(item_get_data(it), buf, res);
memset(item_get_data(it) + res, ' ', it->nbytes - res);
item_set_cas(NULL, NULL, it, get_cas_id());
*rcas = item_get_cas(it);
} else {
hash_item *new_it = do_item_alloc(engine, item_get_key(it),
it->nkey, it->flags,
it->exptime, res,
cookie);
if (new_it == NULL) {
do_item_unlink(engine, it);
return ENGINE_ENOMEM;
}
memcpy(item_get_data(new_it), buf, res);
do_item_replace(engine, it, new_it);
*rcas = item_get_cas(new_it);
do_item_release(engine, new_it); /* release our reference */
}
return ENGINE_SUCCESS;
}
/********************************* ITEM ACCESS *******************************/
/*
* Allocates a new item.
*/
hash_item *item_alloc(struct default_engine *engine,
const void *key, size_t nkey, int flags,
rel_time_t exptime, int nbytes, const void *cookie) {
hash_item *it;
pthread_mutex_lock(&engine->cache_lock);
it = do_item_alloc(engine, key, nkey, flags, exptime, nbytes, cookie);
pthread_mutex_unlock(&engine->cache_lock);
return it;
}
/*
* Returns an item if it hasn't been marked as expired,
* lazy-expiring as needed.
*/
hash_item *item_get(struct default_engine *engine,
const void *key, const size_t nkey) {
hash_item *it;
pthread_mutex_lock(&engine->cache_lock);
it = do_item_get(engine, key, nkey);
pthread_mutex_unlock(&engine->cache_lock);
return it;
}
/*
* Decrements the reference count on an item and adds it to the freelist if
* needed.
*/
void item_release(struct default_engine *engine, hash_item *item) {
pthread_mutex_lock(&engine->cache_lock);
do_item_release(engine, item);
pthread_mutex_unlock(&engine->cache_lock);
}
/*
* Unlinks an item from the LRU and hashtable.
*/
void item_unlink(struct default_engine *engine, hash_item *item) {
pthread_mutex_lock(&engine->cache_lock);
do_item_unlink(engine, item);
pthread_mutex_unlock(&engine->cache_lock);
}
static ENGINE_ERROR_CODE do_arithmetic(struct default_engine *engine,
const void* cookie,
const void* key,
const int nkey,
const bool increment,
const bool create,
const uint64_t delta,
const uint64_t initial,
const rel_time_t exptime,
uint64_t *cas,
uint64_t *result)
{
hash_item *item = do_item_get(engine, key, nkey);
ENGINE_ERROR_CODE ret;
if (item == NULL) {
if (!create) {
return ENGINE_KEY_ENOENT;
} else {
char buffer[128];
int len = snprintf(buffer, sizeof(buffer), "%"PRIu64,
(uint64_t)initial);
item = do_item_alloc(engine, key, nkey, 0, exptime, len, cookie);
if (item == NULL) {
return ENGINE_ENOMEM;
}
memcpy((void*)item_get_data(item), buffer, len);
if ((ret = do_store_item(engine, item, cas,
OPERATION_ADD, cookie)) == ENGINE_SUCCESS) {
*result = initial;
*cas = item_get_cas(item);
}
do_item_release(engine, item);
}
} else {
ret = do_add_delta(engine, item, increment, delta, cas, result, cookie);
do_item_release(engine, item);
}
return ret;
}
ENGINE_ERROR_CODE arithmetic(struct default_engine *engine,
const void* cookie,
const void* key,
const int nkey,
const bool increment,
const bool create,
const uint64_t delta,
const uint64_t initial,
const rel_time_t exptime,
uint64_t *cas,
uint64_t *result)
{
ENGINE_ERROR_CODE ret;
pthread_mutex_lock(&engine->cache_lock);
ret = do_arithmetic(engine, cookie, key, nkey, increment,
create, delta, initial, exptime, cas,
result);
pthread_mutex_unlock(&engine->cache_lock);
return ret;
}
/*
* Stores an item in the cache (high level, obeys set/add/replace semantics)
*/
ENGINE_ERROR_CODE store_item(struct default_engine *engine,
hash_item *item, uint64_t *cas,
ENGINE_STORE_OPERATION operation,
const void *cookie) {
ENGINE_ERROR_CODE ret;
pthread_mutex_lock(&engine->cache_lock);
ret = do_store_item(engine, item, cas, operation, cookie);
pthread_mutex_unlock(&engine->cache_lock);
return ret;
}
static hash_item *do_touch_item(struct default_engine *engine,
const void *key,
uint16_t nkey,
uint32_t exptime)
{
hash_item *item = do_item_get(engine, key, nkey);
if (item != NULL) {
item->exptime = exptime;
}
return item;
}
hash_item *touch_item(struct default_engine *engine,
const void *key,
uint16_t nkey,
uint32_t exptime)
{
hash_item *ret;
pthread_mutex_lock(&engine->cache_lock);
ret = do_touch_item(engine, key, nkey, exptime);
pthread_mutex_unlock(&engine->cache_lock);
return ret;
}
/*
* Flushes expired items after a flush_all call
*/
void item_flush_expired(struct default_engine *engine, time_t when) {
int i;
hash_item *iter, *next;
pthread_mutex_lock(&engine->cache_lock);
if (when == 0) {
engine->config.oldest_live = engine->server.core->get_current_time() - 1;
} else {
engine->config.oldest_live = engine->server.core->realtime(when) - 1;
}
if (engine->config.oldest_live != 0) {
for (i = 0; i < POWER_LARGEST; i++) {
/*
* The LRU is sorted in decreasing time order, and an item's
* timestamp is never newer than its last access time, so we
* only need to walk back until we hit an item older than the
* oldest_live time.
* The oldest_live checking will auto-expire the remaining items.
*/
for (iter = engine->items.heads[i]; iter != NULL; iter = next) {
if (iter->time >= engine->config.oldest_live) {
next = iter->next;
if ((iter->iflag & ITEM_SLABBED) == 0) {
do_item_unlink(engine, iter);
}
} else {
/* We've hit the first old item. Continue to the next queue. */
break;
}
}
}
}
pthread_mutex_unlock(&engine->cache_lock);
}
/*
* Dumps part of the cache
*/
char *item_cachedump(struct default_engine *engine,
unsigned int slabs_clsid,
unsigned int limit,
unsigned int *bytes) {
char *ret;
pthread_mutex_lock(&engine->cache_lock);
ret = do_item_cachedump(slabs_clsid, limit, bytes);
pthread_mutex_unlock(&engine->cache_lock);
return ret;
}
void item_stats(struct default_engine *engine,
ADD_STAT add_stat, const void *cookie)
{
pthread_mutex_lock(&engine->cache_lock);
do_item_stats(engine, add_stat, cookie);
pthread_mutex_unlock(&engine->cache_lock);
}
void item_stats_sizes(struct default_engine *engine,
ADD_STAT add_stat, const void *cookie)
{
pthread_mutex_lock(&engine->cache_lock);
do_item_stats_sizes(engine, add_stat, cookie);
pthread_mutex_unlock(&engine->cache_lock);
}
static void do_item_link_cursor(struct default_engine *engine,
hash_item *cursor, int ii)
{
cursor->slabs_clsid = (uint8_t)ii;
cursor->next = NULL;
cursor->prev = engine->items.tails[ii];
engine->items.tails[ii]->next = cursor;
engine->items.tails[ii] = cursor;
engine->items.sizes[ii]++;
}
typedef ENGINE_ERROR_CODE (*ITERFUNC)(struct default_engine *engine,
hash_item *item, void *cookie);
static bool do_item_walk_cursor(struct default_engine *engine,
hash_item *cursor,
int steplength,
ITERFUNC itemfunc,
void* itemdata,
ENGINE_ERROR_CODE *error)
{
int ii = 0;
*error = ENGINE_SUCCESS;
while (cursor->prev != NULL && ii < steplength) {
++ii;
/* Move cursor */
hash_item *ptr = cursor->prev;
item_unlink_q(engine, cursor);
bool done = false;
if (ptr == engine->items.heads[cursor->slabs_clsid]) {
done = true;
cursor->prev = NULL;
} else {
cursor->next = ptr;
cursor->prev = ptr->prev;
cursor->prev->next = cursor;
ptr->prev = cursor;
}
/* Ignore cursors */
if (ptr->nkey == 0 && ptr->nbytes == 0) {
--ii;
} else {
*error = itemfunc(engine, ptr, itemdata);
if (*error != ENGINE_SUCCESS) {
return false;
}
}
if (done) {
return false;
}
}
return (cursor->prev != NULL);
}
static ENGINE_ERROR_CODE item_scrub(struct default_engine *engine,
hash_item *item,
void *cookie) {
(void)cookie;
engine->scrubber.visited++;
rel_time_t current_time = engine->server.core->get_current_time();
if (item->refcount == 0 &&
(item->exptime != 0 && item->exptime < current_time)) {
do_item_unlink(engine, item);
engine->scrubber.cleaned++;
}
return ENGINE_SUCCESS;
}
static void item_scrub_class(struct default_engine *engine,
hash_item *cursor) {
ENGINE_ERROR_CODE ret;
bool more;
do {
pthread_mutex_lock(&engine->cache_lock);
more = do_item_walk_cursor(engine, cursor, 200, item_scrub, NULL, &ret);
pthread_mutex_unlock(&engine->cache_lock);
if (ret != ENGINE_SUCCESS) {
break;
}
} while (more);
}
static void *item_scubber_main(void *arg)
{
struct default_engine *engine = arg;
hash_item cursor = { .refcount = 1 };
for (int ii = 0; ii < POWER_LARGEST; ++ii) {
pthread_mutex_lock(&engine->cache_lock);
bool skip = false;
if (engine->items.heads[ii] == NULL) {
skip = true;
} else {
// add the item at the tail
do_item_link_cursor(engine, &cursor, ii);
}
pthread_mutex_unlock(&engine->cache_lock);
if (!skip) {
item_scrub_class(engine, &cursor);
}
}
pthread_mutex_lock(&engine->scrubber.lock);
engine->scrubber.stopped = time(NULL);
engine->scrubber.running = false;
pthread_mutex_unlock(&engine->scrubber.lock);
return NULL;
}
bool item_start_scrub(struct default_engine *engine)
{
bool ret = false;
pthread_mutex_lock(&engine->scrubber.lock);
if (!engine->scrubber.running) {
engine->scrubber.started = time(NULL);
engine->scrubber.stopped = 0;
engine->scrubber.visited = 0;
engine->scrubber.cleaned = 0;
engine->scrubber.running = true;
pthread_t t;
pthread_attr_t attr;
if (pthread_attr_init(&attr) != 0 ||
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) != 0 ||
pthread_create(&t, &attr, item_scubber_main, engine) != 0)
{
engine->scrubber.running = false;
} else {
ret = true;
}
}
pthread_mutex_unlock(&engine->scrubber.lock);
return ret;
}
struct tap_client {
hash_item cursor;
hash_item *it;
};
static ENGINE_ERROR_CODE item_tap_iterfunc(struct default_engine *engine,
hash_item *item,
void *cookie) {
struct tap_client *client = cookie;
client->it = item;
++client->it->refcount;
return ENGINE_SUCCESS;
}
static tap_event_t do_item_tap_walker(struct default_engine *engine,
const void *cookie, item **itm,
void **es, uint16_t *nes, uint8_t *ttl,
uint16_t *flags, uint32_t *seqno,
uint16_t *vbucket)
{
struct tap_client *client = engine->server.cookie->get_engine_specific(cookie);
if (client == NULL) {
return TAP_DISCONNECT;
}
*es = NULL;
*nes = 0;
*ttl = (uint8_t)-1;
*seqno = 0;
*flags = 0;
*vbucket = 0;
client->it = NULL;
ENGINE_ERROR_CODE r;
do {
if (!do_item_walk_cursor(engine, &client->cursor, 1, item_tap_iterfunc, client, &r)) {
// find next slab class to look at..
bool linked = false;
for (int ii = client->cursor.slabs_clsid + 1; ii < POWER_LARGEST && !linked; ++ii) {
if (engine->items.heads[ii] != NULL) {
// add the item at the tail
do_item_link_cursor(engine, &client->cursor, ii);
linked = true;
}
}
if (!linked) {
break;
}
}
} while (client->it == NULL);
*itm = client->it;
return (*itm == NULL) ? TAP_DISCONNECT : TAP_MUTATION;
}
tap_event_t item_tap_walker(ENGINE_HANDLE* handle,
const void *cookie, item **itm,
void **es, uint16_t *nes, uint8_t *ttl,
uint16_t *flags, uint32_t *seqno,
uint16_t *vbucket)
{
tap_event_t ret;
struct default_engine *engine = (struct default_engine*)handle;
pthread_mutex_lock(&engine->cache_lock);
ret = do_item_tap_walker(engine, cookie, itm, es, nes, ttl, flags, seqno, vbucket);
pthread_mutex_unlock(&engine->cache_lock);
return ret;
}
bool initialize_item_tap_walker(struct default_engine *engine,
const void* cookie)
{
struct tap_client *client = calloc(1, sizeof(*client));
if (client == NULL) {
return false;
}
client->cursor.refcount = 1;
/* Link the cursor! */
bool linked = false;
for (int ii = 0; ii < POWER_LARGEST && !linked; ++ii) {
pthread_mutex_lock(&engine->cache_lock);
if (engine->items.heads[ii] != NULL) {
// add the item at the tail
do_item_link_cursor(engine, &client->cursor, ii);
linked = true;
}
pthread_mutex_unlock(&engine->cache_lock);
}
engine->server.cookie->store_engine_specific(cookie, client);
return true;
}