root/branches/binary/server/slabs.c @ 745

Revision 745, 12.5 kB (checked in by dsallings, 21 months ago)

Merged commit 'trunk' into lbinary as of r744

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
Line 
1/* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/*
3 * Slabs memory allocation, based on powers-of-N. Slabs are up to 1MB in size
4 * and are divided into chunks. The chunk sizes start off at the size of the
5 * "item" structure plus space for a small key and value. They increase by
6 * a multiplier factor from there, up to half the maximum slab size. The last
7 * slab size is always 1MB, since that's the maximum item size allowed by the
8 * memcached protocol.
9 *
10 * $Id$
11 */
12#include "memcached.h"
13#include <sys/stat.h>
14#include <sys/socket.h>
15#include <sys/signal.h>
16#include <sys/resource.h>
17#include <fcntl.h>
18#include <netinet/in.h>
19#include <errno.h>
20#include <stdlib.h>
21#include <stdio.h>
22#include <string.h>
23#include <assert.h>
24
25#define POWER_SMALLEST 1
26#define POWER_LARGEST  200
27#define POWER_BLOCK 1048576
28#define CHUNK_ALIGN_BYTES 8
29#define DONT_PREALLOC_SLABS
30
31/* powers-of-N allocation structures */
32
33typedef struct {
34    unsigned int size;      /* sizes of items */
35    unsigned int perslab;   /* how many items per slab */
36
37    void **slots;           /* list of item ptrs */
38    unsigned int sl_total;  /* size of previous array */
39    unsigned int sl_curr;   /* first free slot */
40
41    void *end_page_ptr;         /* pointer to next free item at end of page, or 0 */
42    unsigned int end_page_free; /* number of items remaining at end of last alloced page */
43
44    unsigned int slabs;     /* how many slabs were allocated for this class */
45
46    void **slab_list;       /* array of slab pointers */
47    unsigned int list_size; /* size of prev array */
48
49    unsigned int killing;  /* index+1 of dying slab, or zero if none */
50} slabclass_t;
51
52static slabclass_t slabclass[POWER_LARGEST + 1];
53static size_t mem_limit = 0;
54static size_t mem_malloced = 0;
55static int power_largest;
56
57static void *mem_base = NULL;
58static void *mem_current = NULL;
59static size_t mem_avail = 0;
60
61/*
62 * Forward Declarations
63 */
64static int do_slabs_newslab(const unsigned int id);
65static void *memory_allocate(size_t size);
66
67#ifndef DONT_PREALLOC_SLABS
68/* Preallocate as many slab pages as possible (called from slabs_init)
69   on start-up, so users don't get confused out-of-memory errors when
70   they do have free (in-slab) space, but no space to make new slabs.
71   if maxslabs is 18 (POWER_LARGEST - POWER_SMALLEST + 1), then all
72   slab types can be made.  if max memory is less than 18 MB, only the
73   smaller ones will be made.  */
74static void slabs_preallocate (const unsigned int maxslabs);
75#endif
76
77/*
78 * Figures out which slab class (chunk size) is required to store an item of
79 * a given size.
80 *
81 * Given object size, return id to use when allocating/freeing memory for object
82 * 0 means error: can't store such a large object
83 */
84
85unsigned int slabs_clsid(const size_t size) {
86    int res = POWER_SMALLEST;
87
88    if (size == 0)
89        return 0;
90    while (size > slabclass[res].size)
91        if (res++ == power_largest)     /* won't fit in the biggest slab */
92            return 0;
93    return res;
94}
95
96/**
97 * Determines the chunk sizes and initializes the slab class descriptors
98 * accordingly.
99 */
100void slabs_init(const size_t limit, const double factor, const bool prealloc) {
101    int i = POWER_SMALLEST - 1;
102    unsigned int size = sizeof(item) + settings.chunk_size;
103
104    /* Factor of 2.0 means use the default memcached behavior */
105    if (factor == 2.0 && size < 128)
106        size = 128;
107
108    mem_limit = limit;
109
110    if (prealloc) {
111        /* Allocate everything in a big chunk with malloc */
112        mem_base = malloc(mem_limit);
113        if (mem_base != NULL) {
114            mem_current = mem_base;
115            mem_avail = mem_limit;
116        } else {
117            fprintf(stderr, "Warning: Failed to allocate requested memory in"
118                    " one large chunk.\nWill allocate in smaller chunks\n");
119        }
120    }
121
122    memset(slabclass, 0, sizeof(slabclass));
123
124    while (++i < POWER_LARGEST && size <= POWER_BLOCK / 2) {
125        /* Make sure items are always n-byte aligned */
126        if (size % CHUNK_ALIGN_BYTES)
127            size += CHUNK_ALIGN_BYTES - (size % CHUNK_ALIGN_BYTES);
128
129        slabclass[i].size = size;
130        slabclass[i].perslab = POWER_BLOCK / slabclass[i].size;
131        size *= factor;
132        if (settings.verbose > 1) {
133            fprintf(stderr, "slab class %3d: chunk size %6u perslab %5u\n",
134                    i, slabclass[i].size, slabclass[i].perslab);
135        }
136    }
137
138    power_largest = i;
139    slabclass[power_largest].size = POWER_BLOCK;
140    slabclass[power_largest].perslab = 1;
141
142    /* for the test suite:  faking of how much we've already malloc'd */
143    {
144        char *t_initial_malloc = getenv("T_MEMD_INITIAL_MALLOC");
145        if (t_initial_malloc) {
146            mem_malloced = (size_t)atol(t_initial_malloc);
147        }
148
149    }
150
151#ifndef DONT_PREALLOC_SLABS
152    {
153        char *pre_alloc = getenv("T_MEMD_SLABS_ALLOC");
154
155        if (pre_alloc == NULL || atoi(pre_alloc) != 0) {
156            slabs_preallocate(power_largest);
157        }
158    }
159#endif
160}
161
162#ifndef DONT_PREALLOC_SLABS
163static void slabs_preallocate (const unsigned int maxslabs) {
164    int i;
165    unsigned int prealloc = 0;
166
167    /* pre-allocate a 1MB slab in every size class so people don't get
168       confused by non-intuitive "SERVER_ERROR out of memory"
169       messages.  this is the most common question on the mailing
170       list.  if you really don't want this, you can rebuild without
171       these three lines.  */
172
173    for (i = POWER_SMALLEST; i <= POWER_LARGEST; i++) {
174        if (++prealloc > maxslabs)
175            return;
176        do_slabs_newslab(i);
177    }
178
179}
180#endif
181
182static int grow_slab_list (const unsigned int id) {
183    slabclass_t *p = &slabclass[id];
184    if (p->slabs == p->list_size) {
185        size_t new_size =  (p->list_size != 0) ? p->list_size * 2 : 16;
186        void *new_list = realloc(p->slab_list, new_size * sizeof(void *));
187        if (new_list == 0) return 0;
188        p->list_size = new_size;
189        p->slab_list = new_list;
190    }
191    return 1;
192}
193
194static int do_slabs_newslab(const unsigned int id) {
195    slabclass_t *p = &slabclass[id];
196#ifdef ALLOW_SLABS_REASSIGN
197    int len = POWER_BLOCK;
198#else
199    int len = p->size * p->perslab;
200#endif
201    char *ptr;
202
203    if (mem_limit && mem_malloced + len > mem_limit && p->slabs > 0)
204        return 0;
205
206    if (grow_slab_list(id) == 0) return 0;
207
208    ptr = memory_allocate((size_t)len);
209    if (ptr == 0) return 0;
210
211    memset(ptr, 0, (size_t)len);
212    p->end_page_ptr = ptr;
213    p->end_page_free = p->perslab;
214
215    p->slab_list[p->slabs++] = ptr;
216    mem_malloced += len;
217    return 1;
218}
219
220/*@null@*/
221void *do_slabs_alloc(const size_t size, unsigned int id) {
222    slabclass_t *p;
223
224    if (id < POWER_SMALLEST || id > power_largest)
225        return NULL;
226
227    p = &slabclass[id];
228    assert(p->sl_curr == 0 || ((item *)p->slots[p->sl_curr - 1])->slabs_clsid == 0);
229
230#ifdef USE_SYSTEM_MALLOC
231    if (mem_limit && mem_malloced + size > mem_limit)
232        return 0;
233    mem_malloced += size;
234    return malloc(size);
235#endif
236
237    /* fail unless we have space at the end of a recently allocated page,
238       we have something on our freelist, or we could allocate a new page */
239    if (! (p->end_page_ptr != 0 || p->sl_curr != 0 || do_slabs_newslab(id) != 0))
240        return 0;
241
242    /* return off our freelist, if we have one */
243    if (p->sl_curr != 0)
244        return p->slots[--p->sl_curr];
245
246    /* if we recently allocated a whole page, return from that */
247    if (p->end_page_ptr) {
248        void *ptr = p->end_page_ptr;
249        if (--p->end_page_free != 0) {
250            p->end_page_ptr += p->size;
251        } else {
252            p->end_page_ptr = 0;
253        }
254        return ptr;
255    }
256
257    return NULL;  /* shouldn't ever get here */
258}
259
260void do_slabs_free(void *ptr, const size_t size, unsigned int id) {
261    slabclass_t *p;
262
263    assert(((item *)ptr)->slabs_clsid == 0);
264    assert(id >= POWER_SMALLEST && id <= power_largest);
265    if (id < POWER_SMALLEST || id > power_largest)
266        return;
267
268    p = &slabclass[id];
269
270#ifdef USE_SYSTEM_MALLOC
271    mem_malloced -= size;
272    free(ptr);
273    return;
274#endif
275
276    if (p->sl_curr == p->sl_total) { /* need more space on the free list */
277        int new_size = (p->sl_total != 0) ? p->sl_total * 2 : 16;  /* 16 is arbitrary */
278        void **new_slots = realloc(p->slots, new_size * sizeof(void *));
279        if (new_slots == 0)
280            return;
281        p->slots = new_slots;
282        p->sl_total = new_size;
283    }
284    p->slots[p->sl_curr++] = ptr;
285    return;
286}
287
288/*@null@*/
289char* do_slabs_stats(int *buflen) {
290    int i, total;
291    char *buf = (char *)malloc(power_largest * 200 + 100);
292    char *bufcurr = buf;
293
294    *buflen = 0;
295    if (buf == NULL) return NULL;
296
297    total = 0;
298    for(i = POWER_SMALLEST; i <= power_largest; i++) {
299        slabclass_t *p = &slabclass[i];
300        if (p->slabs != 0) {
301            unsigned int perslab, slabs;
302
303            slabs = p->slabs;
304            perslab = p->perslab;
305
306            bufcurr += sprintf(bufcurr, "STAT %d:chunk_size %u\r\n", i, p->size);
307            bufcurr += sprintf(bufcurr, "STAT %d:chunks_per_page %u\r\n", i, perslab);
308            bufcurr += sprintf(bufcurr, "STAT %d:total_pages %u\r\n", i, slabs);
309            bufcurr += sprintf(bufcurr, "STAT %d:total_chunks %u\r\n", i, slabs*perslab);
310            bufcurr += sprintf(bufcurr, "STAT %d:used_chunks %u\r\n", i, slabs*perslab - p->sl_curr);
311            bufcurr += sprintf(bufcurr, "STAT %d:free_chunks %u\r\n", i, p->sl_curr);
312            bufcurr += sprintf(bufcurr, "STAT %d:free_chunks_end %u\r\n", i, p->end_page_free);
313            total++;
314        }
315    }
316    bufcurr += sprintf(bufcurr, "STAT active_slabs %d\r\nSTAT total_malloced %llu\r\n", total, (unsigned long long)mem_malloced);
317    bufcurr += sprintf(bufcurr, "END\r\n");
318    *buflen = bufcurr - buf;
319    return buf;
320}
321
322#ifdef ALLOW_SLABS_REASSIGN
323/* Blows away all the items in a slab class and moves its slabs to another
324   class. This is only used by the "slabs reassign" command, for manual tweaking
325   of memory allocation. It's disabled by default since it requires that all
326   slabs be the same size (which can waste space for chunk size mantissas of
327   other than 2.0).
328   1 = success
329   0 = fail
330   -1 = tried. busy. send again shortly. */
331int do_slabs_reassign(unsigned char srcid, unsigned char dstid) {
332    void *slab, *slab_end;
333    slabclass_t *p, *dp;
334    void *iter;
335    bool was_busy = false;
336
337    if (srcid < POWER_SMALLEST || srcid > power_largest ||
338        dstid < POWER_SMALLEST || dstid > power_largest)
339        return 0;
340
341    p = &slabclass[srcid];
342    dp = &slabclass[dstid];
343
344    /* fail if src still populating, or no slab to give up in src */
345    if (p->end_page_ptr || ! p->slabs)
346        return 0;
347
348    /* fail if dst is still growing or we can't make room to hold its new one */
349    if (dp->end_page_ptr || ! grow_slab_list(dstid))
350        return 0;
351
352    if (p->killing == 0) p->killing = 1;
353
354    slab = p->slab_list[p->killing - 1];
355    slab_end = (char*)slab + POWER_BLOCK;
356
357    for (iter = slab; iter < slab_end; (char*)iter += p->size) {
358        item *it = (item *)iter;
359        if (it->slabs_clsid) {
360            if (it->refcount) was_busy = true;
361            item_unlink(it);
362        }
363    }
364
365    /* go through free list and discard items that are no longer part of this slab */
366    {
367        int fi;
368        for (fi = p->sl_curr - 1; fi >= 0; fi--) {
369            if (p->slots[fi] >= slab && p->slots[fi] < slab_end) {
370                p->sl_curr--;
371                if (p->sl_curr > fi) p->slots[fi] = p->slots[p->sl_curr];
372            }
373        }
374    }
375
376    if (was_busy) return -1;
377
378    /* if good, now move it to the dst slab class */
379    p->slab_list[p->killing - 1] = p->slab_list[p->slabs - 1];
380    p->slabs--;
381    p->killing = 0;
382    dp->slab_list[dp->slabs++] = slab;
383    dp->end_page_ptr = slab;
384    dp->end_page_free = dp->perslab;
385    /* this isn't too critical, but other parts of the code do asserts to
386       make sure this field is always 0.  */
387    for (iter = slab; iter < slab_end; (char*)iter += dp->size) {
388        ((item *)iter)->slabs_clsid = 0;
389    }
390    return 1;
391}
392#endif
393
394static void *memory_allocate(size_t size) {
395    void *ret;
396
397    if (mem_base == NULL) {
398        /* We are not using a preallocated large memory chunk */
399        ret = malloc(size);
400    } else {
401        ret = mem_current;
402
403        if (size > mem_avail) {
404            return NULL;
405        }
406
407        /* mem_current pointer _must_ be aligned!!! */
408        if (size % CHUNK_ALIGN_BYTES) {
409            size += CHUNK_ALIGN_BYTES - (size % CHUNK_ALIGN_BYTES);
410        }
411
412        mem_current += size;
413        if (size < mem_avail) {
414            mem_avail -= size;
415        } else {
416            mem_avail = 0;
417        }
418    }
419
420    return ret;
421}
Note: See TracBrowser for help on using the browser.