| 1 | /* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */ |
|---|
| 2 | /* $Id$ */ |
|---|
| 3 | #include "memcached.h" |
|---|
| 4 | #include <sys/stat.h> |
|---|
| 5 | #include <sys/socket.h> |
|---|
| 6 | #include <sys/signal.h> |
|---|
| 7 | #include <sys/resource.h> |
|---|
| 8 | #include <fcntl.h> |
|---|
| 9 | #include <netinet/in.h> |
|---|
| 10 | #include <errno.h> |
|---|
| 11 | #include <stdlib.h> |
|---|
| 12 | #include <stdio.h> |
|---|
| 13 | #include <string.h> |
|---|
| 14 | #include <time.h> |
|---|
| 15 | #include <assert.h> |
|---|
| 16 | |
|---|
| 17 | /* Forward Declarations */ |
|---|
| 18 | static void item_link_q(item *it); |
|---|
| 19 | static void item_unlink_q(item *it); |
|---|
| 20 | |
|---|
| 21 | /* |
|---|
| 22 | * We only reposition items in the LRU queue if they haven't been repositioned |
|---|
| 23 | * in this many seconds. That saves us from churning on frequently-accessed |
|---|
| 24 | * items. |
|---|
| 25 | */ |
|---|
| 26 | #define ITEM_UPDATE_INTERVAL 60 |
|---|
| 27 | |
|---|
| 28 | #define LARGEST_ID 255 |
|---|
| 29 | static item *heads[LARGEST_ID]; |
|---|
| 30 | static item *tails[LARGEST_ID]; |
|---|
| 31 | static unsigned int sizes[LARGEST_ID]; |
|---|
| 32 | |
|---|
| 33 | void item_init(void) { |
|---|
| 34 | int i; |
|---|
| 35 | for(i = 0; i < LARGEST_ID; i++) { |
|---|
| 36 | heads[i] = NULL; |
|---|
| 37 | tails[i] = NULL; |
|---|
| 38 | sizes[i] = 0; |
|---|
| 39 | } |
|---|
| 40 | } |
|---|
| 41 | |
|---|
| 42 | /* Enable this for reference-count debugging. */ |
|---|
| 43 | #if 0 |
|---|
| 44 | # define DEBUG_REFCNT(it,op) \ |
|---|
| 45 | fprintf(stderr, "item %x refcnt(%c) %d %c%c%c\n", \ |
|---|
| 46 | it, op, it->refcount, \ |
|---|
| 47 | (it->it_flags & ITEM_LINKED) ? 'L' : ' ', \ |
|---|
| 48 | (it->it_flags & ITEM_SLABBED) ? 'S' : ' ', \ |
|---|
| 49 | (it->it_flags & ITEM_DELETED) ? 'D' : ' ') |
|---|
| 50 | #else |
|---|
| 51 | # define DEBUG_REFCNT(it,op) while(0) |
|---|
| 52 | #endif |
|---|
| 53 | |
|---|
| 54 | /** |
|---|
| 55 | * Generates the variable-sized part of the header for an object. |
|---|
| 56 | * |
|---|
| 57 | * key - The key |
|---|
| 58 | * nkey - The length of the key |
|---|
| 59 | * flags - key flags |
|---|
| 60 | * nbytes - Number of bytes to hold value and addition CRLF terminator |
|---|
| 61 | * suffix - Buffer for the "VALUE" line suffix (flags, size). |
|---|
| 62 | * nsuffix - The length of the suffix is stored here. |
|---|
| 63 | * |
|---|
| 64 | * Returns the total size of the header. |
|---|
| 65 | */ |
|---|
| 66 | static size_t item_make_header(const uint8_t nkey, const int flags, const int nbytes, |
|---|
| 67 | char *suffix, uint8_t *nsuffix) { |
|---|
| 68 | /* suffix is defined at 40 chars elsewhere.. */ |
|---|
| 69 | *nsuffix = (uint8_t) snprintf(suffix, 40, " %d %d\r\n", flags, nbytes - 2); |
|---|
| 70 | return sizeof(item) + nkey + *nsuffix + nbytes; |
|---|
| 71 | } |
|---|
| 72 | |
|---|
| 73 | /*@null@*/ |
|---|
| 74 | item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes) { |
|---|
| 75 | uint8_t nsuffix; |
|---|
| 76 | item *it; |
|---|
| 77 | char suffix[40]; |
|---|
| 78 | size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix); |
|---|
| 79 | |
|---|
| 80 | unsigned int id = slabs_clsid(ntotal); |
|---|
| 81 | if (id == 0) |
|---|
| 82 | return 0; |
|---|
| 83 | |
|---|
| 84 | it = slabs_alloc(ntotal); |
|---|
| 85 | if (it == 0) { |
|---|
| 86 | int tries = 50; |
|---|
| 87 | item *search; |
|---|
| 88 | |
|---|
| 89 | /* If requested to not push old items out of cache when memory runs out, |
|---|
| 90 | * we're out of luck at this point... |
|---|
| 91 | */ |
|---|
| 92 | |
|---|
| 93 | if (settings.evict_to_free == 0) return NULL; |
|---|
| 94 | |
|---|
| 95 | /* |
|---|
| 96 | * try to get one off the right LRU |
|---|
| 97 | * don't necessariuly unlink the tail because it may be locked: refcount>0 |
|---|
| 98 | * search up from tail an item with refcount==0 and unlink it; give up after 50 |
|---|
| 99 | * tries |
|---|
| 100 | */ |
|---|
| 101 | |
|---|
| 102 | if (id > LARGEST_ID) return NULL; |
|---|
| 103 | if (tails[id] == 0) return NULL; |
|---|
| 104 | |
|---|
| 105 | for (search = tails[id]; tries > 0 && search != NULL; tries--, search=search->prev) { |
|---|
| 106 | if (search->refcount == 0) { |
|---|
| 107 | if (search->exptime == 0 || search->exptime > current_time) { |
|---|
| 108 | STATS_LOCK(); |
|---|
| 109 | stats.evictions++; |
|---|
| 110 | STATS_UNLOCK(); |
|---|
| 111 | } |
|---|
| 112 | do_item_unlink(search); |
|---|
| 113 | break; |
|---|
| 114 | } |
|---|
| 115 | } |
|---|
| 116 | it = slabs_alloc(ntotal); |
|---|
| 117 | if (it == 0) return NULL; |
|---|
| 118 | } |
|---|
| 119 | |
|---|
| 120 | assert(it->slabs_clsid == 0); |
|---|
| 121 | |
|---|
| 122 | it->slabs_clsid = id; |
|---|
| 123 | |
|---|
| 124 | assert(it != heads[it->slabs_clsid]); |
|---|
| 125 | |
|---|
| 126 | it->next = it->prev = it->h_next = 0; |
|---|
| 127 | it->refcount = 1; /* the caller will have a reference */ |
|---|
| 128 | DEBUG_REFCNT(it, '*'); |
|---|
| 129 | it->it_flags = 0; |
|---|
| 130 | it->nkey = nkey; |
|---|
| 131 | it->nbytes = nbytes; |
|---|
| 132 | strcpy(ITEM_key(it), key); |
|---|
| 133 | it->exptime = exptime; |
|---|
| 134 | memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix); |
|---|
| 135 | it->nsuffix = nsuffix; |
|---|
| 136 | return it; |
|---|
| 137 | } |
|---|
| 138 | |
|---|
| 139 | void item_free(item *it) { |
|---|
| 140 | size_t ntotal = ITEM_ntotal(it); |
|---|
| 141 | assert((it->it_flags & ITEM_LINKED) == 0); |
|---|
| 142 | assert(it != heads[it->slabs_clsid]); |
|---|
| 143 | assert(it != tails[it->slabs_clsid]); |
|---|
| 144 | assert(it->refcount == 0); |
|---|
| 145 | |
|---|
| 146 | /* so slab size changer can tell later if item is already free or not */ |
|---|
| 147 | it->slabs_clsid = 0; |
|---|
| 148 | it->it_flags |= ITEM_SLABBED; |
|---|
| 149 | DEBUG_REFCNT(it, 'F'); |
|---|
| 150 | slabs_free(it, ntotal); |
|---|
| 151 | } |
|---|
| 152 | |
|---|
| 153 | /** |
|---|
| 154 | * Returns true if an item will fit in the cache (its size does not exceed |
|---|
| 155 | * the maximum for a cache entry.) |
|---|
| 156 | */ |
|---|
| 157 | bool item_size_ok(const size_t nkey, const int flags, const int nbytes) { |
|---|
| 158 | char prefix[40]; |
|---|
| 159 | uint8_t nsuffix; |
|---|
| 160 | |
|---|
| 161 | return slabs_clsid(item_make_header(nkey + 1, flags, nbytes, |
|---|
| 162 | prefix, &nsuffix)) != 0; |
|---|
| 163 | } |
|---|
| 164 | |
|---|
| 165 | static void item_link_q(item *it) { /* item is the new head */ |
|---|
| 166 | item **head, **tail; |
|---|
| 167 | /* always true, warns: assert(it->slabs_clsid <= LARGEST_ID); */ |
|---|
| 168 | assert((it->it_flags & ITEM_SLABBED) == 0); |
|---|
| 169 | |
|---|
| 170 | head = &heads[it->slabs_clsid]; |
|---|
| 171 | tail = &tails[it->slabs_clsid]; |
|---|
| 172 | assert(it != *head); |
|---|
| 173 | assert((*head && *tail) || (*head == 0 && *tail == 0)); |
|---|
| 174 | it->prev = 0; |
|---|
| 175 | it->next = *head; |
|---|
| 176 | if (it->next) it->next->prev = it; |
|---|
| 177 | *head = it; |
|---|
| 178 | if (*tail == 0) *tail = it; |
|---|
| 179 | sizes[it->slabs_clsid]++; |
|---|
| 180 | return; |
|---|
| 181 | } |
|---|
| 182 | |
|---|
| 183 | static void item_unlink_q(item *it) { |
|---|
| 184 | item **head, **tail; |
|---|
| 185 | /* always true, warns: assert(it->slabs_clsid <= LARGEST_ID); */ |
|---|
| 186 | head = &heads[it->slabs_clsid]; |
|---|
| 187 | tail = &tails[it->slabs_clsid]; |
|---|
| 188 | |
|---|
| 189 | if (*head == it) { |
|---|
| 190 | assert(it->prev == 0); |
|---|
| 191 | *head = it->next; |
|---|
| 192 | } |
|---|
| 193 | if (*tail == it) { |
|---|
| 194 | assert(it->next == 0); |
|---|
| 195 | *tail = it->prev; |
|---|
| 196 | } |
|---|
| 197 | assert(it->next != it); |
|---|
| 198 | assert(it->prev != it); |
|---|
| 199 | |
|---|
| 200 | if (it->next) it->next->prev = it->prev; |
|---|
| 201 | if (it->prev) it->prev->next = it->next; |
|---|
| 202 | sizes[it->slabs_clsid]--; |
|---|
| 203 | return; |
|---|
| 204 | } |
|---|
| 205 | |
|---|
| 206 | int do_item_link(item *it) { |
|---|
| 207 | assert((it->it_flags & (ITEM_LINKED|ITEM_SLABBED)) == 0); |
|---|
| 208 | assert(it->nbytes < (1024 * 1024)); /* 1MB max size */ |
|---|
| 209 | it->it_flags |= ITEM_LINKED; |
|---|
| 210 | it->time = current_time; |
|---|
| 211 | assoc_insert(it); |
|---|
| 212 | |
|---|
| 213 | STATS_LOCK(); |
|---|
| 214 | stats.curr_bytes += ITEM_ntotal(it); |
|---|
| 215 | stats.curr_items += 1; |
|---|
| 216 | stats.total_items += 1; |
|---|
| 217 | STATS_UNLOCK(); |
|---|
| 218 | |
|---|
| 219 | item_link_q(it); |
|---|
| 220 | |
|---|
| 221 | return 1; |
|---|
| 222 | } |
|---|
| 223 | |
|---|
| 224 | void do_item_unlink(item *it) { |
|---|
| 225 | if ((it->it_flags & ITEM_LINKED) != 0) { |
|---|
| 226 | it->it_flags &= ~ITEM_LINKED; |
|---|
| 227 | STATS_LOCK(); |
|---|
| 228 | stats.curr_bytes -= ITEM_ntotal(it); |
|---|
| 229 | stats.curr_items -= 1; |
|---|
| 230 | STATS_UNLOCK(); |
|---|
| 231 | assoc_delete(ITEM_key(it), it->nkey); |
|---|
| 232 | item_unlink_q(it); |
|---|
| 233 | if (it->refcount == 0) item_free(it); |
|---|
| 234 | } |
|---|
| 235 | } |
|---|
| 236 | |
|---|
| 237 | void do_item_remove(item *it) { |
|---|
| 238 | assert((it->it_flags & ITEM_SLABBED) == 0); |
|---|
| 239 | if (it->refcount != 0) { |
|---|
| 240 | it->refcount--; |
|---|
| 241 | DEBUG_REFCNT(it, '-'); |
|---|
| 242 | } |
|---|
| 243 | assert((it->it_flags & ITEM_DELETED) == 0 || it->refcount != 0); |
|---|
| 244 | if (it->refcount == 0 && (it->it_flags & ITEM_LINKED) == 0) { |
|---|
| 245 | item_free(it); |
|---|
| 246 | } |
|---|
| 247 | } |
|---|
| 248 | |
|---|
| 249 | void do_item_update(item *it) { |
|---|
| 250 | if (it->time < current_time - ITEM_UPDATE_INTERVAL) { |
|---|
| 251 | assert((it->it_flags & ITEM_SLABBED) == 0); |
|---|
| 252 | |
|---|
| 253 | if (it->it_flags & ITEM_LINKED != 0) { |
|---|
| 254 | item_unlink_q(it); |
|---|
| 255 | it->time = current_time; |
|---|
| 256 | item_link_q(it); |
|---|
| 257 | } |
|---|
| 258 | } |
|---|
| 259 | } |
|---|
| 260 | |
|---|
| 261 | int do_item_replace(item *it, item *new_it) { |
|---|
| 262 | assert((it->it_flags & ITEM_SLABBED) == 0); |
|---|
| 263 | |
|---|
| 264 | do_item_unlink(it); |
|---|
| 265 | return do_item_link(new_it); |
|---|
| 266 | } |
|---|
| 267 | |
|---|
| 268 | /*@null@*/ |
|---|
| 269 | char *do_item_cachedump(const unsigned int slabs_clsid, const unsigned int limit, unsigned int *bytes) { |
|---|
| 270 | unsigned int memlimit = 2 * 1024 * 1024; /* 2MB max response size */ |
|---|
| 271 | char *buffer; |
|---|
| 272 | unsigned int bufcurr; |
|---|
| 273 | item *it; |
|---|
| 274 | unsigned int len; |
|---|
| 275 | unsigned int shown = 0; |
|---|
| 276 | char temp[512]; |
|---|
| 277 | |
|---|
| 278 | if (slabs_clsid > LARGEST_ID) return NULL; |
|---|
| 279 | it = heads[slabs_clsid]; |
|---|
| 280 | |
|---|
| 281 | buffer = malloc((size_t)memlimit); |
|---|
| 282 | if (buffer == 0) return NULL; |
|---|
| 283 | bufcurr = 0; |
|---|
| 284 | |
|---|
| 285 | while (it != NULL && (limit == 0 || shown < limit)) { |
|---|
| 286 | len = snprintf(temp, sizeof(temp), "ITEM %s [%d b; %lu s]\r\n", ITEM_key(it), it->nbytes - 2, it->exptime + stats.started); |
|---|
| 287 | if (bufcurr + len + 6 > memlimit) /* 6 is END\r\n\0 */ |
|---|
| 288 | break; |
|---|
| 289 | strcpy(buffer + bufcurr, temp); |
|---|
| 290 | bufcurr += len; |
|---|
| 291 | shown++; |
|---|
| 292 | it = it->next; |
|---|
| 293 | } |
|---|
| 294 | |
|---|
| 295 | memcpy(buffer + bufcurr, "END\r\n", 6); |
|---|
| 296 | bufcurr += 5; |
|---|
| 297 | |
|---|
| 298 | *bytes = bufcurr; |
|---|
| 299 | return buffer; |
|---|
| 300 | } |
|---|
| 301 | |
|---|
| 302 | char *do_item_stats(int *bytes) { |
|---|
| 303 | size_t bufleft = (size_t) LARGEST_ID * 80; |
|---|
| 304 | char *buffer = malloc(bufleft); |
|---|
| 305 | char *bufcurr = buffer; |
|---|
| 306 | rel_time_t now = current_time; |
|---|
| 307 | int i; |
|---|
| 308 | int linelen; |
|---|
| 309 | |
|---|
| 310 | if (buffer == NULL) { |
|---|
| 311 | return NULL; |
|---|
| 312 | } |
|---|
| 313 | |
|---|
| 314 | for (i = 0; i < LARGEST_ID; i++) { |
|---|
| 315 | if (tails[i] != NULL) { |
|---|
| 316 | linelen = snprintf(bufcurr, bufleft, "STAT items:%d:number %u\r\nSTAT items:%d:age %u\r\n", |
|---|
| 317 | i, sizes[i], i, now - tails[i]->time); |
|---|
| 318 | if (linelen + sizeof("END\r\n") < bufleft) { |
|---|
| 319 | bufcurr += linelen; |
|---|
| 320 | bufleft -= linelen; |
|---|
| 321 | } |
|---|
| 322 | else { |
|---|
| 323 | /* The caller didn't allocate enough buffer space. */ |
|---|
| 324 | break; |
|---|
| 325 | } |
|---|
| 326 | } |
|---|
| 327 | } |
|---|
| 328 | memcpy(bufcurr, "END\r\n", 6); |
|---|
| 329 | bufcurr += 5; |
|---|
| 330 | |
|---|
| 331 | *bytes = bufcurr - buffer; |
|---|
| 332 | return buffer; |
|---|
| 333 | } |
|---|
| 334 | |
|---|
| 335 | /** dumps out a list of objects of each size, with granularity of 32 bytes */ |
|---|
| 336 | /*@null@*/ |
|---|
| 337 | char* do_item_stats_sizes(int *bytes) { |
|---|
| 338 | const int num_buckets = 32768; /* max 1MB object, divided into 32 bytes size buckets */ |
|---|
| 339 | unsigned int *histogram = (unsigned int *)malloc((size_t)num_buckets * sizeof(int)); |
|---|
| 340 | char *buf = (char *)malloc(2 * 1024 * 1024); /* 2MB max response size */ |
|---|
| 341 | int i; |
|---|
| 342 | |
|---|
| 343 | if (histogram == 0 || buf == 0) { |
|---|
| 344 | if (histogram) free(histogram); |
|---|
| 345 | if (buf) free(buf); |
|---|
| 346 | return NULL; |
|---|
| 347 | } |
|---|
| 348 | |
|---|
| 349 | /* build the histogram */ |
|---|
| 350 | memset(histogram, 0, (size_t)num_buckets * sizeof(int)); |
|---|
| 351 | for (i = 0; i < LARGEST_ID; i++) { |
|---|
| 352 | item *iter = heads[i]; |
|---|
| 353 | while (iter) { |
|---|
| 354 | int ntotal = ITEM_ntotal(iter); |
|---|
| 355 | int bucket = ntotal / 32; |
|---|
| 356 | if ((ntotal % 32) != 0) bucket++; |
|---|
| 357 | if (bucket < num_buckets) histogram[bucket]++; |
|---|
| 358 | iter = iter->next; |
|---|
| 359 | } |
|---|
| 360 | } |
|---|
| 361 | |
|---|
| 362 | /* write the buffer */ |
|---|
| 363 | *bytes = 0; |
|---|
| 364 | for (i = 0; i < num_buckets; i++) { |
|---|
| 365 | if (histogram[i] != 0) { |
|---|
| 366 | *bytes += sprintf(&buf[*bytes], "%d %u\r\n", i * 32, histogram[i]); |
|---|
| 367 | } |
|---|
| 368 | } |
|---|
| 369 | *bytes += sprintf(&buf[*bytes], "END\r\n"); |
|---|
| 370 | free(histogram); |
|---|
| 371 | return buf; |
|---|
| 372 | } |
|---|
| 373 | |
|---|
| 374 | /** returns true if a deleted item's delete-locked-time is over, and it |
|---|
| 375 | should be removed from the namespace */ |
|---|
| 376 | bool item_delete_lock_over (item *it) { |
|---|
| 377 | assert(it->it_flags & ITEM_DELETED); |
|---|
| 378 | return (current_time >= it->exptime); |
|---|
| 379 | } |
|---|
| 380 | |
|---|
| 381 | /** wrapper around assoc_find which does the lazy expiration/deletion logic */ |
|---|
| 382 | item *do_item_get_notedeleted(const char *key, const size_t nkey, bool *delete_locked) { |
|---|
| 383 | item *it = assoc_find(key, nkey); |
|---|
| 384 | if (delete_locked) *delete_locked = false; |
|---|
| 385 | if (it != NULL && (it->it_flags & ITEM_DELETED)) { |
|---|
| 386 | /* it's flagged as delete-locked. let's see if that condition |
|---|
| 387 | is past due, and the 5-second delete_timer just hasn't |
|---|
| 388 | gotten to it yet... */ |
|---|
| 389 | if (!item_delete_lock_over(it)) { |
|---|
| 390 | if (delete_locked) *delete_locked = true; |
|---|
| 391 | it = NULL; |
|---|
| 392 | } |
|---|
| 393 | } |
|---|
| 394 | if (it != NULL && settings.oldest_live != 0 && settings.oldest_live <= current_time && |
|---|
| 395 | it->time <= settings.oldest_live) { |
|---|
| 396 | do_item_unlink(it); /* MTSAFE - cache_lock held */ |
|---|
| 397 | it = NULL; |
|---|
| 398 | } |
|---|
| 399 | if (it != NULL && it->exptime != 0 && it->exptime <= current_time) { |
|---|
| 400 | do_item_unlink(it); /* MTSAFE - cache_lock held */ |
|---|
| 401 | it = NULL; |
|---|
| 402 | } |
|---|
| 403 | |
|---|
| 404 | if (it != NULL) { |
|---|
| 405 | it->refcount++; |
|---|
| 406 | DEBUG_REFCNT(it, '+'); |
|---|
| 407 | } |
|---|
| 408 | return it; |
|---|
| 409 | } |
|---|
| 410 | |
|---|
| 411 | item *item_get(const char *key, const size_t nkey) { |
|---|
| 412 | return item_get_notedeleted(key, nkey, 0); |
|---|
| 413 | } |
|---|
| 414 | |
|---|
| 415 | /** returns an item whether or not it's delete-locked or expired. */ |
|---|
| 416 | item *do_item_get_nocheck(const char *key, const size_t nkey) { |
|---|
| 417 | item *it = assoc_find(key, nkey); |
|---|
| 418 | if (it) { |
|---|
| 419 | it->refcount++; |
|---|
| 420 | DEBUG_REFCNT(it, '+'); |
|---|
| 421 | } |
|---|
| 422 | return it; |
|---|
| 423 | } |
|---|
| 424 | |
|---|
| 425 | /* expires items that are more recent than the oldest_live setting. */ |
|---|
| 426 | void do_item_flush_expired(void) { |
|---|
| 427 | int i; |
|---|
| 428 | item *iter, *next; |
|---|
| 429 | if (settings.oldest_live == 0) |
|---|
| 430 | return; |
|---|
| 431 | for (i = 0; i < LARGEST_ID; i++) { |
|---|
| 432 | /* The LRU is sorted in decreasing time order, and an item's timestamp |
|---|
| 433 | * is never newer than its last access time, so we only need to walk |
|---|
| 434 | * back until we hit an item older than the oldest_live time. |
|---|
| 435 | * The oldest_live checking will auto-expire the remaining items. |
|---|
| 436 | */ |
|---|
| 437 | for (iter = heads[i]; iter != NULL; iter = next) { |
|---|
| 438 | if (iter->time >= settings.oldest_live) { |
|---|
| 439 | next = iter->next; |
|---|
| 440 | if ((iter->it_flags & ITEM_SLABBED) == 0) { |
|---|
| 441 | do_item_unlink(iter); |
|---|
| 442 | } |
|---|
| 443 | } else { |
|---|
| 444 | /* We've hit the first old item. Continue to the next queue. */ |
|---|
| 445 | break; |
|---|
| 446 | } |
|---|
| 447 | } |
|---|
| 448 | } |
|---|
| 449 | } |
|---|