root/trunk/server/slabs.c @ 738

Revision 738, 12.5 kB (checked in by dormando, 21 months ago)

Don't re-calculate the slab class id.
slabs_alloc() internally calls slabs_clsid(), so an eviction case would crawl the list of slab classes three times.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
Line 
1/* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/*
3 * Slabs memory allocation, based on powers-of-N. Slabs are up to 1MB in size
4 * and are divided into chunks. The chunk sizes start off at the size of the
5 * "item" structure plus space for a small key and value. They increase by
6 * a multiplier factor from there, up to half the maximum slab size. The last
7 * slab size is always 1MB, since that's the maximum item size allowed by the
8 * memcached protocol.
9 *
10 * $Id$
11 */
12#include "memcached.h"
13#include <sys/stat.h>
14#include <sys/socket.h>
15#include <sys/signal.h>
16#include <sys/resource.h>
17#include <fcntl.h>
18#include <netinet/in.h>
19#include <errno.h>
20#include <stdlib.h>
21#include <stdio.h>
22#include <string.h>
23#include <assert.h>
24
25#define POWER_SMALLEST 1
26#define POWER_LARGEST  200
27#define POWER_BLOCK 1048576
28#define CHUNK_ALIGN_BYTES 8
29#define DONT_PREALLOC_SLABS
30
31/* powers-of-N allocation structures */
32
33typedef struct {
34    unsigned int size;      /* sizes of items */
35    unsigned int perslab;   /* how many items per slab */
36
37    void **slots;           /* list of item ptrs */
38    unsigned int sl_total;  /* size of previous array */
39    unsigned int sl_curr;   /* first free slot */
40
41    void *end_page_ptr;         /* pointer to next free item at end of page, or 0 */
42    unsigned int end_page_free; /* number of items remaining at end of last alloced page */
43
44    unsigned int slabs;     /* how many slabs were allocated for this class */
45
46    void **slab_list;       /* array of slab pointers */
47    unsigned int list_size; /* size of prev array */
48
49    unsigned int killing;  /* index+1 of dying slab, or zero if none */
50} slabclass_t;
51
52static slabclass_t slabclass[POWER_LARGEST + 1];
53static size_t mem_limit = 0;
54static size_t mem_malloced = 0;
55static int power_largest;
56
57static void *mem_base = NULL;
58static void *mem_current = NULL;
59static size_t mem_avail = 0;
60
61/*
62 * Forward Declarations
63 */
64static int do_slabs_newslab(const unsigned int id);
65static void *memory_allocate(size_t size);
66
67#ifndef DONT_PREALLOC_SLABS
68/* Preallocate as many slab pages as possible (called from slabs_init)
69   on start-up, so users don't get confused out-of-memory errors when
70   they do have free (in-slab) space, but no space to make new slabs.
71   if maxslabs is 18 (POWER_LARGEST - POWER_SMALLEST + 1), then all
72   slab types can be made.  if max memory is less than 18 MB, only the
73   smaller ones will be made.  */
74static void slabs_preallocate (const unsigned int maxslabs);
75#endif
76
77/*
78 * Figures out which slab class (chunk size) is required to store an item of
79 * a given size.
80 *
81 * Given object size, return id to use when allocating/freeing memory for object
82 * 0 means error: can't store such a large object
83 */
84
85unsigned int slabs_clsid(const size_t size) {
86    int res = POWER_SMALLEST;
87
88    if (size == 0)
89        return 0;
90    while (size > slabclass[res].size)
91        if (res++ == power_largest)     /* won't fit in the biggest slab */
92            return 0;
93    return res;
94}
95
96/**
97 * Determines the chunk sizes and initializes the slab class descriptors
98 * accordingly.
99 */
100void slabs_init(const size_t limit, const double factor, const bool prealloc) {
101    int i = POWER_SMALLEST - 1;
102    unsigned int size = sizeof(item) + settings.chunk_size;
103
104    /* Factor of 2.0 means use the default memcached behavior */
105    if (factor == 2.0 && size < 128)
106        size = 128;
107
108    mem_limit = limit;
109
110    if (prealloc) {
111        /* Allocate everything in a big chunk with malloc */
112        mem_base = malloc(mem_limit);
113        if (mem_base != NULL) {
114            mem_current = mem_base;
115            mem_avail = mem_limit;
116        } else {
117            fprintf(stderr, "Warning: Failed to allocate requested memory in"
118                    " one large chunk.\nWill allocate in smaller chunks\n");
119        }
120    }
121
122    memset(slabclass, 0, sizeof(slabclass));
123
124    while (++i < POWER_LARGEST && size <= POWER_BLOCK / 2) {
125        /* Make sure items are always n-byte aligned */
126        if (size % CHUNK_ALIGN_BYTES)
127            size += CHUNK_ALIGN_BYTES - (size % CHUNK_ALIGN_BYTES);
128
129        slabclass[i].size = size;
130        slabclass[i].perslab = POWER_BLOCK / slabclass[i].size;
131        size *= factor;
132        if (settings.verbose > 1) {
133            fprintf(stderr, "slab class %3d: chunk size %6u perslab %5u\n",
134                    i, slabclass[i].size, slabclass[i].perslab);
135        }
136    }
137
138    power_largest = i;
139    slabclass[power_largest].size = POWER_BLOCK;
140    slabclass[power_largest].perslab = 1;
141
142    /* for the test suite:  faking of how much we've already malloc'd */
143    {
144        char *t_initial_malloc = getenv("T_MEMD_INITIAL_MALLOC");
145        if (t_initial_malloc) {
146            mem_malloced = (size_t)atol(t_initial_malloc);
147        }
148
149    }
150
151#ifndef DONT_PREALLOC_SLABS
152    {
153        char *pre_alloc = getenv("T_MEMD_SLABS_ALLOC");
154
155        if (pre_alloc == NULL || atoi(pre_alloc) != 0) {
156            slabs_preallocate(power_largest);
157        }
158    }
159#endif
160}
161
162#ifndef DONT_PREALLOC_SLABS
163static void slabs_preallocate (const unsigned int maxslabs) {
164    int i;
165    unsigned int prealloc = 0;
166
167    /* pre-allocate a 1MB slab in every size class so people don't get
168       confused by non-intuitive "SERVER_ERROR out of memory"
169       messages.  this is the most common question on the mailing
170       list.  if you really don't want this, you can rebuild without
171       these three lines.  */
172
173    for (i = POWER_SMALLEST; i <= POWER_LARGEST; i++) {
174        if (++prealloc > maxslabs)
175            return;
176        do_slabs_newslab(i);
177    }
178
179}
180#endif
181
182static int grow_slab_list (const unsigned int id) {
183    slabclass_t *p = &slabclass[id];
184    if (p->slabs == p->list_size) {
185        size_t new_size =  (p->list_size != 0) ? p->list_size * 2 : 16;
186        void *new_list = realloc(p->slab_list, new_size * sizeof(void *));
187        if (new_list == 0) return 0;
188        p->list_size = new_size;
189        p->slab_list = new_list;
190    }
191    return 1;
192}
193
194static int do_slabs_newslab(const unsigned int id) {
195    slabclass_t *p = &slabclass[id];
196#ifdef ALLOW_SLABS_REASSIGN
197    int len = POWER_BLOCK;
198#else
199    int len = p->size * p->perslab;
200#endif
201    char *ptr;
202
203    if (mem_limit && mem_malloced + len > mem_limit && p->slabs > 0)
204        return 0;
205
206    if (grow_slab_list(id) == 0) return 0;
207
208    ptr = memory_allocate((size_t)len);
209    if (ptr == 0) return 0;
210
211    memset(ptr, 0, (size_t)len);
212    p->end_page_ptr = ptr;
213    p->end_page_free = p->perslab;
214
215    p->slab_list[p->slabs++] = ptr;
216    mem_malloced += len;
217    return 1;
218}
219
220/*@null@*/
221void *do_slabs_alloc(const size_t size, unsigned int id) {
222    slabclass_t *p;
223
224    if (id < POWER_SMALLEST || id > power_largest)
225        return NULL;
226
227    p = &slabclass[id];
228    assert(p->sl_curr == 0 || ((item *)p->slots[p->sl_curr - 1])->slabs_clsid == 0);
229
230#ifdef USE_SYSTEM_MALLOC
231    if (mem_limit && mem_malloced + size > mem_limit)
232        return 0;
233    mem_malloced += size;
234    return malloc(size);
235#endif
236
237    /* fail unless we have space at the end of a recently allocated page,
238       we have something on our freelist, or we could allocate a new page */
239    if (! (p->end_page_ptr != 0 || p->sl_curr != 0 || do_slabs_newslab(id) != 0))
240        return 0;
241
242    /* return off our freelist, if we have one */
243    if (p->sl_curr != 0)
244        return p->slots[--p->sl_curr];
245
246    /* if we recently allocated a whole page, return from that */
247    if (p->end_page_ptr) {
248        void *ptr = p->end_page_ptr;
249        if (--p->end_page_free != 0) {
250            p->end_page_ptr += p->size;
251        } else {
252            p->end_page_ptr = 0;
253        }
254        return ptr;
255    }
256
257    return NULL;  /* shouldn't ever get here */
258}
259
260void do_slabs_free(void *ptr, const size_t size) {
261    unsigned char id = slabs_clsid(size);
262    slabclass_t *p;
263
264    assert(((item *)ptr)->slabs_clsid == 0);
265    assert(id >= POWER_SMALLEST && id <= power_largest);
266    if (id < POWER_SMALLEST || id > power_largest)
267        return;
268
269    p = &slabclass[id];
270
271#ifdef USE_SYSTEM_MALLOC
272    mem_malloced -= size;
273    free(ptr);
274    return;
275#endif
276
277    if (p->sl_curr == p->sl_total) { /* need more space on the free list */
278        int new_size = (p->sl_total != 0) ? p->sl_total * 2 : 16;  /* 16 is arbitrary */
279        void **new_slots = realloc(p->slots, new_size * sizeof(void *));
280        if (new_slots == 0)
281            return;
282        p->slots = new_slots;
283        p->sl_total = new_size;
284    }
285    p->slots[p->sl_curr++] = ptr;
286    return;
287}
288
289/*@null@*/
290char* do_slabs_stats(int *buflen) {
291    int i, total;
292    char *buf = (char *)malloc(power_largest * 200 + 100);
293    char *bufcurr = buf;
294
295    *buflen = 0;
296    if (buf == NULL) return NULL;
297
298    total = 0;
299    for(i = POWER_SMALLEST; i <= power_largest; i++) {
300        slabclass_t *p = &slabclass[i];
301        if (p->slabs != 0) {
302            unsigned int perslab, slabs;
303
304            slabs = p->slabs;
305            perslab = p->perslab;
306
307            bufcurr += sprintf(bufcurr, "STAT %d:chunk_size %u\r\n", i, p->size);
308            bufcurr += sprintf(bufcurr, "STAT %d:chunks_per_page %u\r\n", i, perslab);
309            bufcurr += sprintf(bufcurr, "STAT %d:total_pages %u\r\n", i, slabs);
310            bufcurr += sprintf(bufcurr, "STAT %d:total_chunks %u\r\n", i, slabs*perslab);
311            bufcurr += sprintf(bufcurr, "STAT %d:used_chunks %u\r\n", i, slabs*perslab - p->sl_curr);
312            bufcurr += sprintf(bufcurr, "STAT %d:free_chunks %u\r\n", i, p->sl_curr);
313            bufcurr += sprintf(bufcurr, "STAT %d:free_chunks_end %u\r\n", i, p->end_page_free);
314            total++;
315        }
316    }
317    bufcurr += sprintf(bufcurr, "STAT active_slabs %d\r\nSTAT total_malloced %llu\r\n", total, (unsigned long long)mem_malloced);
318    bufcurr += sprintf(bufcurr, "END\r\n");
319    *buflen = bufcurr - buf;
320    return buf;
321}
322
323#ifdef ALLOW_SLABS_REASSIGN
324/* Blows away all the items in a slab class and moves its slabs to another
325   class. This is only used by the "slabs reassign" command, for manual tweaking
326   of memory allocation. It's disabled by default since it requires that all
327   slabs be the same size (which can waste space for chunk size mantissas of
328   other than 2.0).
329   1 = success
330   0 = fail
331   -1 = tried. busy. send again shortly. */
332int do_slabs_reassign(unsigned char srcid, unsigned char dstid) {
333    void *slab, *slab_end;
334    slabclass_t *p, *dp;
335    void *iter;
336    bool was_busy = false;
337
338    if (srcid < POWER_SMALLEST || srcid > power_largest ||
339        dstid < POWER_SMALLEST || dstid > power_largest)
340        return 0;
341
342    p = &slabclass[srcid];
343    dp = &slabclass[dstid];
344
345    /* fail if src still populating, or no slab to give up in src */
346    if (p->end_page_ptr || ! p->slabs)
347        return 0;
348
349    /* fail if dst is still growing or we can't make room to hold its new one */
350    if (dp->end_page_ptr || ! grow_slab_list(dstid))
351        return 0;
352
353    if (p->killing == 0) p->killing = 1;
354
355    slab = p->slab_list[p->killing - 1];
356    slab_end = (char*)slab + POWER_BLOCK;
357
358    for (iter = slab; iter < slab_end; (char*)iter += p->size) {
359        item *it = (item *)iter;
360        if (it->slabs_clsid) {
361            if (it->refcount) was_busy = true;
362            item_unlink(it);
363        }
364    }
365
366    /* go through free list and discard items that are no longer part of this slab */
367    {
368        int fi;
369        for (fi = p->sl_curr - 1; fi >= 0; fi--) {
370            if (p->slots[fi] >= slab && p->slots[fi] < slab_end) {
371                p->sl_curr--;
372                if (p->sl_curr > fi) p->slots[fi] = p->slots[p->sl_curr];
373            }
374        }
375    }
376
377    if (was_busy) return -1;
378
379    /* if good, now move it to the dst slab class */
380    p->slab_list[p->killing - 1] = p->slab_list[p->slabs - 1];
381    p->slabs--;
382    p->killing = 0;
383    dp->slab_list[dp->slabs++] = slab;
384    dp->end_page_ptr = slab;
385    dp->end_page_free = dp->perslab;
386    /* this isn't too critical, but other parts of the code do asserts to
387       make sure this field is always 0.  */
388    for (iter = slab; iter < slab_end; (char*)iter += dp->size) {
389        ((item *)iter)->slabs_clsid = 0;
390    }
391    return 1;
392}
393#endif
394
395static void *memory_allocate(size_t size) {
396    void *ret;
397
398    if (mem_base == NULL) {
399        /* We are not using a preallocated large memory chunk */
400        ret = malloc(size);
401    } else {
402        ret = mem_current;
403
404        if (size > mem_avail) {
405            return NULL;
406        }
407
408        /* mem_current pointer _must_ be aligned!!! */
409        if (size % CHUNK_ALIGN_BYTES) {
410            size += CHUNK_ALIGN_BYTES - (size % CHUNK_ALIGN_BYTES);
411        }
412
413        mem_current += size;
414        if (size < mem_avail) {
415            mem_avail -= size;
416        } else {
417            mem_avail = 0;
418        }
419    }
420
421    return ret;
422}
Note: See TracBrowser for help on using the browser.