item

해당 부분은 실제로 hashtable을 구축해서, item을 어떻게 저장하고 찾을 것인지에 대한 부분이다.

#define POWER_LARGEST 256 /* actual cap is 255 */

#define LARGEST_ID POWER_LARGEST

static item *heads[LARGEST_ID];
static item *tails[LARGEST_ID];

POWER_LARGEST는 slabclass의 최대 개수이다. 즉 item *heads, *tails는 slabclass 마다 존재한다고
생각하면 간단하다.

일단 프로토콜 파싱 부분을 보자. 여기서는 일단 text 프로토콜만 살펴본다.(실제로는 거의 유사하므로…)
protocol 파싱 부분은 memcached.c 에 존재한다. get은 데이터를 찾아오는 명령이다. 실제로 item_get
을 호출해서 item이 존재하는지 체크한다.

/* ntokens is overwritten here... shrug.. */
static inline void process_get_command(conn *c, token_t *tokens, size_t ntokens, bool return_cas) {
char *key;
size_t nkey;
int i = 0;
item *it;
token_t *key_token = &tokens[KEY_TOKEN];
char *suffix;
assert(c != NULL);

do {
while(key_token->length != 0) {

key = key_token->value;
nkey = key_token->length;

if(nkey > KEY_MAX_LENGTH) {
out_string(c, "CLIENT_ERROR bad command line format");
while (i-- > 0) {
item_remove(*(c->ilist + i));
}
return;
}

it = item_get(key, nkey);
if (settings.detail_enabled) {
stats_prefix_record_get(key, nkey, NULL != it);
}
if (it) {
if (i >= c->isize) {
item **new_list = realloc(c->ilist, sizeof(item *) * c->isize * 2);
if (new_list) {
c->isize *= 2;
c->ilist = new_list;
} else {
STATS_LOCK();
stats.malloc_fails++;
STATS_UNLOCK();
item_remove(it);
break;
}
}

/*
* Construct the response. Each hit adds three elements to the
* outgoing data list:
* "VALUE "
* key
* " " + flags + " " + data length + "\r\n" + data (with \r\n)
*/

if (return_cas)
{
MEMCACHED_COMMAND_GET(c->sfd, ITEM_key(it), it->nkey,
it->nbytes, ITEM_get_cas(it));
/* Goofy mid-flight realloc. */
if (i >= c->suffixsize) {
char **new_suffix_list = realloc(c->suffixlist,
sizeof(char *) * c->suffixsize * 2);
if (new_suffix_list) {
c->suffixsize *= 2;
c->suffixlist = new_suffix_list;
} else {
STATS_LOCK();
stats.malloc_fails++;
STATS_UNLOCK();
item_remove(it);
break;
}
}

suffix = cache_alloc(c->thread->suffix_cache);
if (suffix == NULL) {
STATS_LOCK();
stats.malloc_fails++;
STATS_UNLOCK();
out_of_memory(c, "SERVER_ERROR out of memory making CAS suffix");
item_remove(it);
while (i-- > 0) {
item_remove(*(c->ilist + i));
}
return;
}
*(c->suffixlist + i) = suffix;
int suffix_len = snprintf(suffix, SUFFIX_SIZE,
" %llu\r\n",
(unsigned long long)ITEM_get_cas(it));
if (add_iov(c, "VALUE ", 6) != 0 ||
add_iov(c, ITEM_key(it), it->nkey) != 0 ||
add_iov(c, ITEM_suffix(it), it->nsuffix - 2) != 0 ||
add_iov(c, suffix, suffix_len) != 0 ||
add_iov(c, ITEM_data(it), it->nbytes) != 0)
{
item_remove(it);
break;
}
}
else
{
MEMCACHED_COMMAND_GET(c->sfd, ITEM_key(it), it->nkey,
it->nbytes, ITEM_get_cas(it));
if (add_iov(c, "VALUE ", 6) != 0 ||
add_iov(c, ITEM_key(it), it->nkey) != 0 ||
add_iov(c, ITEM_suffix(it), it->nsuffix + it->nbytes) != 0)
{
item_remove(it);
break;
}
}
if (settings.verbose > 1) {
int ii;
fprintf(stderr, ">%d sending key ", c->sfd);
for (ii = 0; ii < it->nkey; ++ii) {
fprintf(stderr, "%c", key[ii]);
}
fprintf(stderr, "\n");
}

/* item_get() has incremented it->refcount for us */
pthread_mutex_lock(&c->thread->stats.mutex);
c->thread->stats.slab_stats[ITEM_clsid(it)].get_hits++;
c->thread->stats.get_cmds++;
pthread_mutex_unlock(&c->thread->stats.mutex);
item_update(it);
*(c->ilist + i) = it;
i++;

} else {
pthread_mutex_lock(&c->thread->stats.mutex);
c->thread->stats.get_misses++;
c->thread->stats.get_cmds++;
pthread_mutex_unlock(&c->thread->stats.mutex);
MEMCACHED_COMMAND_GET(c->sfd, key, nkey, -1, 0);
}

key_token++;
}

/*
* If the command string hasn't been fully processed, get the next set
* of tokens.
*/
if(key_token->value != NULL) {
ntokens = tokenize_command(key_token->value, tokens, MAX_TOKENS);
key_token = tokens;
}

} while(key_token->value != NULL);

c->icurr = c->ilist;
c->ileft = i;
if (return_cas) {
c->suffixcurr = c->suffixlist;
c->suffixleft = i;
}

if (settings.verbose > 1)
fprintf(stderr, ">%d END\n", c->sfd);

/*
If the loop was terminated because of out-of-memory, it is not
reliable to add END\r\n to the buffer, because it might not end
in \r\n. So we send SERVER_ERROR instead.
*/
if (key_token->value != NULL || add_iov(c, "END\r\n", 5) != 0
|| (IS_UDP(c->transport) && build_udp_headers(c) != 0)) {
out_of_memory(c, "SERVER_ERROR out of memory writing get response");
}
else {
conn_set_state(c, conn_mwrite);
c->msgcurr = 0;
}
}

item_get 은 다음과 같이 구현되어 있다. key 를 hash해서 hv 값을 만든다.
이때 hash 함수는 jenkins hash 와 murmur3가 있는데 기본으로 jenkins hash가 적용된다.
(설정가능)

/*
* Returns an item if it hasn't been marked as expired,
* lazy-expiring as needed.
*/
item *item_get(const char *key, const size_t nkey) {
item *it;
uint32_t hv;
hv = hash(key, nkey);
item_lock(hv);
it = do_item_get(key, nkey, hv);
item_unlock(hv);
return it;
}

먼저 item_lock 코드를 살펴보자.

#define hashsize(n) ((unsigned long int)1<<(n))
#define hashmask(n) (hashsize(n)-1)

void item_lock(uint32_t hv) {
mutex_lock(&item_locks[hv & hashmask(item_lock_hashpower)]);
}

먼저 해당 item 이 들어있는 영역의 Lock을 건다. 여기서 이 item_locks는
memcached_thread_init() 함수의 thread 개수에 따라서 바뀌게 된다. thread
개수가 3개 이하면 1024개의 item_locks 가 생기고 5개 이상이면 8192개의
item_locks 가 생기게 된다.

이 의미는 해당 영역만 lock 이 걸리므로 다른 hash 결과와 연관된 item의 경우는
다른 thread에서 여전히 접근이 가능하다는 것이다.

/*
* Initializes the thread subsystem, creating various worker threads.
*
* nthreads Number of worker event handler threads to spawn
* main_base Event base for main thread
*/
void memcached_thread_init(int nthreads, struct event_base *main_base) {
int i;
int power;

for (i = 0; i < POWER_LARGEST; i++) {
pthread_mutex_init(&lru_locks[i], NULL);
}
pthread_mutex_init(&worker_hang_lock, NULL);

pthread_mutex_init(&init_lock, NULL);
pthread_cond_init(&init_cond, NULL);

pthread_mutex_init(&cqi_freelist_lock, NULL);
cqi_freelist = NULL;

/* Want a wide lock table, but don't waste memory */
if (nthreads < 3) {
power = 10;
} else if (nthreads < 4) {
power = 11;
} else if (nthreads < 5) {
power = 12;
} else {
/* 8192 buckets, and central locks don't scale much past 5 threads */
power = 13;
}

if (power >= hashpower) {
fprintf(stderr, "Hash table power size (%d) cannot be equal to or less than item lock table (%d)\n", hashpower, power);
fprintf(stderr, "Item lock table grows with `-t N` (worker threadcount)\n");
fprintf(stderr, "Hash table grows with `-o hashpower=N` \n");
exit(1);
}

item_lock_count = hashsize(power);
item_lock_hashpower = power;

item_locks = calloc(item_lock_count, sizeof(pthread_mutex_t));
if (! item_locks) {
perror("Can't allocate item locks");
exit(1);
}
for (i = 0; i < item_lock_count; i++) {
pthread_mutex_init(&item_locks[i], NULL);
}

threads = calloc(nthreads, sizeof(LIBEVENT_THREAD));
if (! threads) {
perror("Can't allocate thread descriptors");
exit(1);
}

dispatcher_thread.base = main_base;
dispatcher_thread.thread_id = pthread_self();

for (i = 0; i < nthreads; i++) {
int fds[2];
if (pipe(fds)) {
perror("Can't create notify pipe");
exit(1);
}

threads[i].notify_receive_fd = fds[0];
threads[i].notify_send_fd = fds[1];

setup_thread(&threads[i]);
/* Reserve three fds for the libevent base, and two for the pipe */
stats.reserved_fds += 5;
}

/* Create threads after we've done all the libevent setup. */
for (i = 0; i < nthreads; i++) {
create_worker(worker_libevent, &threads[i]);
}

/* Wait for all the threads to set themselves up before returning. */
pthread_mutex_lock(&init_lock);
wait_for_thread_registration(nthreads);
pthread_mutex_unlock(&init_lock);
}

이제 다시 do_item_get() 함수를 살펴보자.

/** wrapper around assoc_find which does the lazy expiration logic */
item *do_item_get(const char *key, const size_t nkey, const uint32_t hv) {
item *it = assoc_find(key, nkey, hv);
if (it != NULL) {
refcount_incr(&it->refcount);
/* Optimization for slab reassignment. prevents popular items from
* jamming in busy wait. Can only do this here to satisfy lock order
* of item_lock, slabs_lock. */
/* This was made unsafe by removal of the cache_lock:
* slab_rebalance_signal and slab_rebal.* are modified in a separate
* thread under slabs_lock. If slab_rebalance_signal = 1, slab_start =
* NULL (0), but slab_end is still equal to some value, this would end
* up unlinking every item fetched.
* This is either an acceptable loss, or if slab_rebalance_signal is
* true, slab_start/slab_end should be put behind the slabs_lock.
* Which would cause a huge potential slowdown.
* Could also use a specific lock for slab_rebal.* and
* slab_rebalance_signal (shorter lock?)
*/
/*if (slab_rebalance_signal &&
((void *)it >= slab_rebal.slab_start && (void *)it < slab_rebal.slab_end)) {
do_item_unlink(it, hv);
do_item_remove(it);
it = NULL;
}*/
}
int was_found = 0;

if (settings.verbose > 2) {
int ii;
if (it == NULL) {
fprintf(stderr, "> NOT FOUND ");
} else {
fprintf(stderr, "> FOUND KEY ");
was_found++;
}
for (ii = 0; ii < nkey; ++ii) {
fprintf(stderr, "%c", key[ii]);
}
}

if (it != NULL) {
if (item_is_flushed(it)) {
do_item_unlink(it, hv);
do_item_remove(it);
it = NULL;
if (was_found) {
fprintf(stderr, " -nuked by flush");
}
} else if (it->exptime != 0 && it->exptime <= current_time) {
do_item_unlink(it, hv);
do_item_remove(it);
it = NULL;
if (was_found) {
fprintf(stderr, " -nuked by expire");
}
} else {
it->it_flags |= ITEM_FETCHED|ITEM_ACTIVE;
DEBUG_REFCNT(it, '+');
}
}

if (settings.verbose > 2)
fprintf(stderr, "\n");

return it;
}

실제로 item은 다시 assoc_find 함수를 통해서 hashtable에서 item을 가져오게 된다.
item이 있다면, flush 되었는지, expire 가 되었는지 확인해서 이 경우에는 item을 지우고
(정확히는 item 객체를 풀에 반납하고) NULL을 반납하게 된다.

memcached의 경우는 내부적으로 flush_all 이라는 명령이 오면.. 현재 시간을 저장하고, 그것보다
이전에 생성된 데이터의 경우에 (item->time 에 생성시간이 저장됨.) 지워버림. 이것을 flush 라고 함

item *assoc_find(const char *key, const size_t nkey, const uint32_t hv) {
item *it;
unsigned int oldbucket;

if (expanding &&
(oldbucket = (hv & hashmask(hashpower - 1))) >= expand_bucket)
{
it = old_hashtable[oldbucket];
} else {
it = primary_hashtable[hv & hashmask(hashpower)];
}

item *ret = NULL;
int depth = 0;
while (it) {
if ((nkey == it->nkey) && (memcmp(key, ITEM_key(it), nkey) == 0)) {
ret = it;
break;
}
it = it->h_next;
++depth;
}
MEMCACHED_ASSOC_FIND(key, nkey, depth);
return ret;
}

memcached 에서 primary_hashtable 와 old_hashtable 가 있는데 old_hashtable는
hashtable의 expanding 중에만 존재하는 hashtable이다. 정확히는 확장 전까지의
primary_hashtable이 old_hashtable이 된다. 확장 전의 bucket 이면 old_hashtable에서
데이터를 찾고, 이미 확장된 bucket 은 primary_hashtable 에서 찾는다.

memcached 의 hashtable은 일반적으로 해당 hashtable 안에 linked list 가 들어가 있는
구조이다. 즉 hv 값으로 hashtable을 찾고 거기서는 linked list의 선형 탐색을 하게 되는
것이다. java8에서의 hashmap은 안에 tree 형태로 데이터가 들어가서 많은 데이터를 가지고
있을 경우, 더 빠른 탐색 시간을 보장하는데… memcached 와 redis 등은 hashtable extending
으로 이런 이슈를 해결한다.

이제 hashtable에 저장하는 코드를 살펴보자. process_update_command() 에서 시작하게 된다.

static void process_update_command(conn *c, token_t *tokens, const size_t ntokens, int comm, bool handle_cas) {
char *key;
size_t nkey;
unsigned int flags;
int32_t exptime_int = 0;
time_t exptime;
int vlen;
uint64_t req_cas_id=0;
item *it;

assert(c != NULL);

set_noreply_maybe(c, tokens, ntokens);

if (tokens[KEY_TOKEN].length > KEY_MAX_LENGTH) {
out_string(c, "CLIENT_ERROR bad command line format");
return;
}

key = tokens[KEY_TOKEN].value;
nkey = tokens[KEY_TOKEN].length;

if (! (safe_strtoul(tokens[2].value, (uint32_t *)&flags)
&& safe_strtol(tokens[3].value, &exptime_int)
&& safe_strtol(tokens[4].value, (int32_t *)&vlen))) {
out_string(c, "CLIENT_ERROR bad command line format");
return;
}

/* Ubuntu 8.04 breaks when I pass exptime to safe_strtol */
exptime = exptime_int;

/* Negative exptimes can underflow and end up immortal. realtime() will
immediately expire values that are greater than REALTIME_MAXDELTA, but less
than process_started, so lets aim for that. */
if (exptime < 0)
exptime = REALTIME_MAXDELTA + 1;

// does cas value exist?
if (handle_cas) {
if (!safe_strtoull(tokens[5].value, &req_cas_id)) {
out_string(c, "CLIENT_ERROR bad command line format");
return;
}
}

vlen += 2;
if (vlen < 0 || vlen - 2 < 0) {
out_string(c, "CLIENT_ERROR bad command line format");
return;
}

if (settings.detail_enabled) {
stats_prefix_record_set(key, nkey);
}

it = item_alloc(key, nkey, flags, realtime(exptime), vlen);

if (it == 0) {
if (! item_size_ok(nkey, flags, vlen))
out_string(c, "SERVER_ERROR object too large for cache");
else
out_of_memory(c, "SERVER_ERROR out of memory storing object");
/* swallow the data line */
c->write_and_go = conn_swallow;
c->sbytes = vlen;

/* Avoid stale data persisting in cache because we failed alloc.
* Unacceptable for SET. Anywhere else too? */
if (comm == NREAD_SET) {
it = item_get(key, nkey);
if (it) {
item_unlink(it);
item_remove(it);
}
}

return;
}
ITEM_set_cas(it, req_cas_id);

c->item = it;
c->ritem = ITEM_data(it);
c->rlbytes = it->nbytes;
c->cmd = comm;
conn_set_state(c, conn_nread);
}

일단 할당을 위해서 item_alloc() 을 호출하게 됩니다. item_alloc은 do_item_alloc 에서
내부적으로 lock을 사용하므로 따로 lock을 사용하지 않습니다.

item *item_alloc(char *key, size_t nkey, int flags, rel_time_t exptime, int nbytes) {
item *it;
/* do_item_alloc handles its own locks */
it = do_item_alloc(key, nkey, flags, exptime, nbytes, 0);
return it;
}

do_item_alloc 에서는 먼저 item 이 사용해야할 size를 구합니다. slabclass가 size 별로
구분되어 있는 걸 기억한다면, 아 이제 slabclass 에서 item 을 가져오겠다는 걸 예상할 수 있습니다.
item_make_header() 는 item이 차지할 size를 알려주는 함수입니다.

slab_alloc을 호출해서 사용할 item 을 가져옵니다.(이 때 실제로 slab에서 할당됩니다.)
여기서는 실제 item 구조체에 data, flags, key등을 셋팅하고 item 구조체를 넘겨줍니다.

item *do_item_alloc(char *key, const size_t nkey, const int flags,
const rel_time_t exptime, const int nbytes,
const uint32_t cur_hv) {
int i;
uint8_t nsuffix;
item *it = NULL;
char suffix[40];
unsigned int total_chunks;
size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix);
if (settings.use_cas) {
ntotal += sizeof(uint64_t);
}

unsigned int id = slabs_clsid(ntotal);
if (id == 0)
return 0;

/* If no memory is available, attempt a direct LRU juggle/eviction */
/* This is a race in order to simplify lru_pull_tail; in cases where
* locked items are on the tail, you want them to fall out and cause
* occasional OOM's, rather than internally work around them.
* This also gives one fewer code path for slab alloc/free
*/
for (i = 0; i < 5; i++) {
/* Try to reclaim memory first */
if (!settings.lru_maintainer_thread) {
lru_pull_tail(id, COLD_LRU, 0, false, cur_hv);
}
it = slabs_alloc(ntotal, id, &total_chunks, 0);
if (settings.expirezero_does_not_evict)
total_chunks -= noexp_lru_size(id);
if (it == NULL) {
if (settings.lru_maintainer_thread) {
lru_pull_tail(id, HOT_LRU, total_chunks, false, cur_hv);
lru_pull_tail(id, WARM_LRU, total_chunks, false, cur_hv);
lru_pull_tail(id, COLD_LRU, total_chunks, true, cur_hv);
} else {
lru_pull_tail(id, COLD_LRU, 0, true, cur_hv);
}
} else {
break;
}
}

if (i > 0) {
pthread_mutex_lock(&lru_locks[id]);
itemstats[id].direct_reclaims += i;
pthread_mutex_unlock(&lru_locks[id]);
}

if (it == NULL) {
pthread_mutex_lock(&lru_locks[id]);
itemstats[id].outofmemory++;
pthread_mutex_unlock(&lru_locks[id]);
return NULL;
}

assert(it->slabs_clsid == 0);
//assert(it != heads[id]);

/* Refcount is seeded to 1 by slabs_alloc() */
it->next = it->prev = it->h_next = 0;
/* Items are initially loaded into the HOT_LRU. This is '0' but I want at
* least a note here. Compiler (hopefully?) optimizes this out.
*/
if (settings.lru_maintainer_thread) {
if (exptime == 0 && settings.expirezero_does_not_evict) {
id |= NOEXP_LRU;
} else {
id |= HOT_LRU;
}
} else {
/* There is only COLD in compat-mode */
id |= COLD_LRU;
}
it->slabs_clsid = id;

DEBUG_REFCNT(it, '*');
it->it_flags = settings.use_cas ? ITEM_CAS : 0;
it->nkey = nkey;
it->nbytes = nbytes;
memcpy(ITEM_key(it), key, nkey);
it->exptime = exptime;
memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix);
it->nsuffix = nsuffix;
return it;
}

그런데 뭔가 이상한 걸 느끼지 못했나요? get 은 실제 hashtable에서 데이터를 가져왔는데…
왜 set 은 실제 데이터를 넣는 부분이 없을까요? 실제 memcached 코드에서는 이 부분이 나눠져
있습니다. 먼저 memcached text 프로토콜에서 set 프로토콜을 한번 살펴봅시다.
먼저 set 명령은 2 line으로 구분되고 첫번째 라인은 다음과 같이 구성됩니다.

SET key [flags] [exptime] length [noreply]

중간에 length 가 보이지요? 그럼 그 다음중에는 무엇이 와야 할까요? 네… 정답입니다.
위에서 length 지정한 크기 만큼의 실제 데이터죠. 그래서 process_update_command의 마지막에서
다음과 같이 conn_set_state 함수를 통해서 현재 connection의 상태를 conn_nread로 만들어둡니다.

c->item = it;
c->ritem = ITEM_data(it);
c->rlbytes = it->nbytes;
c->cmd = comm;
conn_set_state(c, conn_nread);

그러면 실제 event_handler 에서 conn_nread 상태에서는 다음과 같이 호출되게 됩니다.

case conn_nread:
if (c->rlbytes == 0) {
complete_nread(c);
break;
}

/* Check if rbytes < 0, to prevent crash */
if (c->rlbytes < 0) {
if (settings.verbose) {
fprintf(stderr, "Invalid rlbytes to read: len %d\n", c->rlbytes);
}
conn_set_state(c, conn_closing);
break;
}

/* first check if we have leftovers in the conn_read buffer */
if (c->rbytes > 0) {
int tocopy = c->rbytes > c->rlbytes ? c->rlbytes : c->rbytes;
if (c->ritem != c->rcurr) {
memmove(c->ritem, c->rcurr, tocopy);
}
c->ritem += tocopy;
c->rlbytes -= tocopy;
c->rcurr += tocopy;
c->rbytes -= tocopy;
if (c->rlbytes == 0) {
break;
}
}

/* now try reading from the socket */
res = read(c->sfd, c->ritem, c->rlbytes);
if (res > 0) {
pthread_mutex_lock(&c->thread->stats.mutex);
c->thread->stats.bytes_read += res;
pthread_mutex_unlock(&c->thread->stats.mutex);
if (c->rcurr == c->ritem) {
c->rcurr += res;
}
c->ritem += res;
c->rlbytes -= res;
break;
}
if (res == 0) { /* end of stream */
conn_set_state(c, conn_closing);
break;
}
if (res == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
if (!update_event(c, EV_READ | EV_PERSIST)) {
if (settings.verbose > 0)
fprintf(stderr, "Couldn't update event\n");
conn_set_state(c, conn_closing);
break;
}
stop = true;
break;
}
/* otherwise we have a real error, on which we close the connection */
if (settings.verbose > 0) {
fprintf(stderr, "Failed to read, and not due to blocking:\n"
"errno: %d %s \n"
"rcurr=%lx ritem=%lx rbuf=%lx rlbytes=%d rsize=%d\n",
errno, strerror(errno),
(long)c->rcurr, (long)c->ritem, (long)c->rbuf,
(int)c->rlbytes, (int)c->rsize);
}
conn_set_state(c, conn_closing);
break;

conn_nread 에서는 c->rlbytes 값이 0이 될때까지 계속 이벤트가 발생할 때 마다
read 함수를 통해서 데이터를 읽어들이게 됩니다. 그리고 c->rlbytes == 0이면
필요한 만큼의 데이터를 읽었다는 의미이므로 complete_nread()를 호출합니다.

static void complete_nread(conn *c) {
assert(c != NULL);
assert(c->protocol == ascii_prot
|| c->protocol == binary_prot);

if (c->protocol == ascii_prot) {
complete_nread_ascii(c);
} else if (c->protocol == binary_prot) {
complete_nread_binary(c);
}
}

이제 complete_nread_ascii 함수에서 store_item()을 호출해서 최종 마무리 작업을 하게 됩니다.

/*
* we get here after reading the value in set/add/replace commands. The command
* has been stored in c->cmd, and the item is ready in c->item.
*/
static void complete_nread_ascii(conn *c) {
assert(c != NULL);

item *it = c->item;
int comm = c->cmd;
enum store_item_type ret;

pthread_mutex_lock(&c->thread->stats.mutex);
c->thread->stats.slab_stats[ITEM_clsid(it)].set_cmds++;
pthread_mutex_unlock(&c->thread->stats.mutex);

if (strncmp(ITEM_data(it) + it->nbytes - 2, "\r\n", 2) != 0) {
out_string(c, "CLIENT_ERROR bad data chunk");
} else {
ret = store_item(it, comm, c);

switch (ret) {
case STORED:
out_string(c, "STORED");
break;
case EXISTS:
out_string(c, "EXISTS");
break;
case NOT_FOUND:
out_string(c, "NOT_FOUND");
break;
case NOT_STORED:
out_string(c, "NOT_STORED");
break;
default:
out_string(c, "SERVER_ERROR Unhandled storage type.");
}
}

item_remove(c->item); /* release the c->item reference */
c->item = 0;
}

store_item 은 다시 item_lock을 걸고 do_store_item 을 호출합니다.

/*
* Stores an item in the cache (high level, obeys set/add/replace semantics)
*/
enum store_item_type store_item(item *item, int comm, conn* c) {
enum store_item_type ret;
uint32_t hv;

hv = hash(ITEM_key(item), item->nkey);
item_lock(hv);
ret = do_store_item(item, comm, c, hv);
item_unlock(hv);
return ret;
}

do_store_item 은 좀 더 복잡한 작업을 합니다. 최초의 함수명을 기억하시나요?
process_update_command 입니다. 즉 없던 item이 추가되는 경우만 아니라, 기존의 item 이
바뀌는 경우도 당연히 발생합니다.

그래서 먼저 do_item_get() 을 호출해서 item을 찾습니다. 일단은 기존에 item 이 없던
새로운 item이 추가되는 경우만 살펴보도록 하겠습니다.

/*
* Stores an item in the cache according to the semantics of one of the set
* commands. In threaded mode, this is protected by the cache lock.
*
* Returns the state of storage.
*/
enum store_item_type do_store_item(item *it, int comm, conn *c, const uint32_t hv) {
char *key = ITEM_key(it);
item *old_it = do_item_get(key, it->nkey, hv);
enum store_item_type stored = NOT_STORED;

item *new_it = NULL;
int flags;

if (old_it != NULL && comm == NREAD_ADD) {
/* add only adds a nonexistent item, but promote to head of LRU */
do_item_update(old_it);
} else if (!old_it && (comm == NREAD_REPLACE
|| comm == NREAD_APPEND || comm == NREAD_PREPEND))
{
/* replace only replaces an existing value; don't store */
} else if (comm == NREAD_CAS) {
/* validate cas operation */
if(old_it == NULL) {
// LRU expired
stored = NOT_FOUND;
pthread_mutex_lock(&c->thread->stats.mutex);
c->thread->stats.cas_misses++;
pthread_mutex_unlock(&c->thread->stats.mutex);
}
else if (ITEM_get_cas(it) == ITEM_get_cas(old_it)) {
// cas validates
// it and old_it may belong to different classes.
// I'm updating the stats for the one that's getting pushed out
pthread_mutex_lock(&c->thread->stats.mutex);
c->thread->stats.slab_stats[ITEM_clsid(old_it)].cas_hits++;
pthread_mutex_unlock(&c->thread->stats.mutex);

item_replace(old_it, it, hv);
stored = STORED;
} else {
pthread_mutex_lock(&c->thread->stats.mutex);
c->thread->stats.slab_stats[ITEM_clsid(old_it)].cas_badval++;
pthread_mutex_unlock(&c->thread->stats.mutex);

if(settings.verbose > 1) {
fprintf(stderr, "CAS: failure: expected %llu, got %llu\n",
(unsigned long long)ITEM_get_cas(old_it),
(unsigned long long)ITEM_get_cas(it));
}
stored = EXISTS;
}
} else {
/*
* Append - combine new and old record into single one. Here it's
* atomic and thread-safe.
*/
if (comm == NREAD_APPEND || comm == NREAD_PREPEND) {
/*
* Validate CAS
*/
if (ITEM_get_cas(it) != 0) {
// CAS much be equal
if (ITEM_get_cas(it) != ITEM_get_cas(old_it)) {
stored = EXISTS;
}
}

if (stored == NOT_STORED) {
/* we have it and old_it here - alloc memory to hold both */
/* flags was already lost - so recover them from ITEM_suffix(it) */

flags = (int) strtol(ITEM_suffix(old_it), (char **) NULL, 10);

new_it = do_item_alloc(key, it->nkey, flags, old_it->exptime, it->nbytes + old_it->nbytes - 2 /* CRLF */, hv);

if (new_it == NULL) {
/* SERVER_ERROR out of memory */
if (old_it != NULL)
do_item_remove(old_it);

return NOT_STORED;
}

/* copy data from it and old_it to new_it */

if (comm == NREAD_APPEND) {
memcpy(ITEM_data(new_it), ITEM_data(old_it), old_it->nbytes);
memcpy(ITEM_data(new_it) + old_it->nbytes - 2 /* CRLF */, ITEM_data(it), it->nbytes);
} else {
/* NREAD_PREPEND */
memcpy(ITEM_data(new_it), ITEM_data(it), it->nbytes);
memcpy(ITEM_data(new_it) + it->nbytes - 2 /* CRLF */, ITEM_data(old_it), old_it->nbytes);
}

it = new_it;
}
}

if (stored == NOT_STORED) {
if (old_it != NULL)
item_replace(old_it, it, hv);
else
do_item_link(it, hv);

c->cas = ITEM_get_cas(it);

stored = STORED;
}
}

if (old_it != NULL)
do_item_remove(old_it); /* release our reference */
if (new_it != NULL)
do_item_remove(new_it);

if (stored == STORED) {
c->cas = ITEM_get_cas(it);
}

return stored;
}

기존 item 이 있다면(update), item_replace 이 호출되고, 새로운 item(set) 이라면
do_item_link 가 호출되게 됩니다.

int do_item_link(item *it, const uint32_t hv) {
MEMCACHED_ITEM_LINK(ITEM_key(it), it->nkey, it->nbytes);
assert((it->it_flags & (ITEM_LINKED|ITEM_SLABBED)) == 0);
it->it_flags |= ITEM_LINKED;
it->time = current_time;

STATS_LOCK();
stats.curr_bytes += ITEM_ntotal(it);
stats.curr_items += 1;
stats.total_items += 1;
STATS_UNLOCK();

/* Allocate a new CAS ID on link. */
ITEM_set_cas(it, (settings.use_cas) ? get_cas_id() : 0);
assoc_insert(it, hv);
item_link_q(it);
refcount_incr(&it->refcount);

return 1;
}

여기서 최종적으로 assoc_insert 를 호출하면서 hashtable에 데이터가 저장됩니다.
그리고 아까 언급했던 item->time 에 현재 시간이 들어갑니다. assoc_find 와 거의 동일합니다.
hv 값을 통해서 hashtable의 index를 구하고 거기 첫 값을 it->h_next 로 설정하고,
해당 table의 첫번째 값을 새로운 item으로 설정합니다. 즉, hashtable의 head로 새로운
item이 들어가는 거죠. 이때 hash_items 가 (hashsize(hashpower) * 3) / 2 보다 커지면
두배로 확장하게 됩니다.

/* Note: this isn't an assoc_update. The key must not already exist to call this */
int assoc_insert(item *it, const uint32_t hv) {
unsigned int oldbucket;

// assert(assoc_find(ITEM_key(it), it->nkey) == 0); /* shouldn't have duplicately named things defined */

if (expanding &&
(oldbucket = (hv & hashmask(hashpower - 1))) >= expand_bucket)
{
it->h_next = old_hashtable[oldbucket];
old_hashtable[oldbucket] = it;
} else {
it->h_next = primary_hashtable[hv & hashmask(hashpower)];
primary_hashtable[hv & hashmask(hashpower)] = it;
}

pthread_mutex_lock(&hash_items_counter_lock);
hash_items++;
if (! expanding && hash_items > (hashsize(hashpower) * 3) / 2) {
assoc_start_expand();
}
pthread_mutex_unlock(&hash_items_counter_lock);

MEMCACHED_ASSOC_INSERT(ITEM_key(it), it->nkey, hash_items);
return 1;
}

[입 개발] memcached 소스분석 #2

item

Trending Articles

Practice Sheet of Right form of verbs for HSC Students

Download: FK ft Shenky – Nakuyewa ”Prod by: Shenky”

How to win at Markstrat (Markstrat Tips and Tricks) – Vodites

Ominde Commission Report and Recommendations – Ominde Report of 1964

Bureau of Internal Revenue: Regional Offices (Directory)

GO 53 on Enhancement of Ex-gratia upto 5 Lakhs Toddy Tappers in Telangana

Cakewalk CA-2A Leveling Amplifier v2.0.1.97 WiN, v2.0.1.96 OSX Incl Keygen

Mp3 Download: Mdu - Kunjenjenjena

How the kill the job , when DTP request running for long hours.

Microsoft Intune から展開しているアプリのアップデートについて

18-year-old girl was beaten for half an hour by two Northampton men in 'an...

Car crash in Dunton Bassett leaves driver in critical condition

Macky 2, Two Others In Road Accident

Application log 00000000000000089514: Could not convert queue DLVST90CLNT

Detroit mafia: D’Anna Brothers agree to plea deal

Delivery block field greyed out using VA02

Muloraki Au

【個人撮影】スマホのプライベート映像♪「中に出さないで///」カラオケ屋での生ハメ撮りが流出ｗ【リベンジポルノ】＠PornHub

BREAKING NEWS: Diamond Platnumz Is Reported Dead After Ghastly Car Accident

FIAT 500 B0111 B0112