root/branches/append/server/slabs.c

Revision 591, 11.4 kB (checked in by plindner, 2 years ago)

gcc -pedantic changes, comments, signed/unsigned changes. also convert expanded to bool

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
Line 
1/* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/*
3 * Slabs memory allocation, based on powers-of-N. Slabs are up to 1MB in size
4 * and are divided into chunks. The chunk sizes start off at the size of the
5 * "item" structure plus space for a small key and value. They increase by
6 * a multiplier factor from there, up to half the maximum slab size. The last
7 * slab size is always 1MB, since that's the maximum item size allowed by the
8 * memcached protocol.
9 *
10 * $Id$
11 */
12#include "memcached.h"
13#include <sys/stat.h>
14#include <sys/socket.h>
15#include <sys/signal.h>
16#include <sys/resource.h>
17#include <fcntl.h>
18#include <netinet/in.h>
19#include <errno.h>
20#include <stdlib.h>
21#include <stdio.h>
22#include <string.h>
23#include <assert.h>
24
25#define POWER_SMALLEST 1
26#define POWER_LARGEST  200
27#define POWER_BLOCK 1048576
28#define CHUNK_ALIGN_BYTES (sizeof(void *))
29#define DONT_PREALLOC_SLABS
30
31/* powers-of-N allocation structures */
32
33typedef struct {
34    unsigned int size;      /* sizes of items */
35    unsigned int perslab;   /* how many items per slab */
36
37    void **slots;           /* list of item ptrs */
38    unsigned int sl_total;  /* size of previous array */
39    unsigned int sl_curr;   /* first free slot */
40
41    void *end_page_ptr;         /* pointer to next free item at end of page, or 0 */
42    unsigned int end_page_free; /* number of items remaining at end of last alloced page */
43
44    unsigned int slabs;     /* how many slabs were allocated for this class */
45
46    void **slab_list;       /* array of slab pointers */
47    unsigned int list_size; /* size of prev array */
48
49    unsigned int killing;  /* index+1 of dying slab, or zero if none */
50} slabclass_t;
51
52static slabclass_t slabclass[POWER_LARGEST + 1];
53static size_t mem_limit = 0;
54static size_t mem_malloced = 0;
55static int power_largest;
56
57/*
58 * Forward Declarations
59 */
60static int do_slabs_newslab(const unsigned int id);
61
62#ifndef DONT_PREALLOC_SLABS
63/* Preallocate as many slab pages as possible (called from slabs_init)
64   on start-up, so users don't get confused out-of-memory errors when
65   they do have free (in-slab) space, but no space to make new slabs.
66   if maxslabs is 18 (POWER_LARGEST - POWER_SMALLEST + 1), then all
67   slab types can be made.  if max memory is less than 18 MB, only the
68   smaller ones will be made.  */
69static void slabs_preallocate (const unsigned int maxslabs);
70#endif
71
72/*
73 * Figures out which slab class (chunk size) is required to store an item of
74 * a given size.
75 *
76 * Given object size, return id to use when allocating/freeing memory for object
77 * 0 means error: can't store such a large object
78 */
79
80unsigned int slabs_clsid(const size_t size) {
81    int res = POWER_SMALLEST;
82
83    if (size == 0)
84        return 0;
85    while (size > slabclass[res].size)
86        if (res++ == power_largest)     /* won't fit in the biggest slab */
87            return 0;
88    return res;
89}
90
91/**
92 * Determines the chunk sizes and initializes the slab class descriptors
93 * accordingly.
94 */
95void slabs_init(const size_t limit, const double factor) {
96    int i = POWER_SMALLEST - 1;
97    unsigned int size = sizeof(item) + settings.chunk_size;
98
99    /* Factor of 2.0 means use the default memcached behavior */
100    if (factor == 2.0 && size < 128)
101        size = 128;
102
103    mem_limit = limit;
104    memset(slabclass, 0, sizeof(slabclass));
105
106    while (++i < POWER_LARGEST && size <= POWER_BLOCK / 2) {
107        /* Make sure items are always n-byte aligned */
108        if (size % CHUNK_ALIGN_BYTES)
109            size += CHUNK_ALIGN_BYTES - (size % CHUNK_ALIGN_BYTES);
110
111        slabclass[i].size = size;
112        slabclass[i].perslab = POWER_BLOCK / slabclass[i].size;
113        size *= factor;
114        if (settings.verbose > 1) {
115            fprintf(stderr, "slab class %3d: chunk size %6u perslab %5u\n",
116                    i, slabclass[i].size, slabclass[i].perslab);
117        }
118    }
119
120    power_largest = i;
121    slabclass[power_largest].size = POWER_BLOCK;
122    slabclass[power_largest].perslab = 1;
123
124    /* for the test suite:  faking of how much we've already malloc'd */
125    {
126        char *t_initial_malloc = getenv("T_MEMD_INITIAL_MALLOC");
127        if (t_initial_malloc) {
128            mem_malloced = (size_t)atol(t_initial_malloc);
129        }
130
131    }
132
133#ifndef DONT_PREALLOC_SLABS
134    {
135        char *pre_alloc = getenv("T_MEMD_SLABS_ALLOC");
136
137        if (pre_alloc == NULL || atoi(pre_alloc) != 0) {
138            slabs_preallocate(power_largest);
139        }
140    }
141#endif
142}
143
144#ifndef DONT_PREALLOC_SLABS
145static void slabs_preallocate (const unsigned int maxslabs) {
146    int i;
147    unsigned int prealloc = 0;
148
149    /* pre-allocate a 1MB slab in every size class so people don't get
150       confused by non-intuitive "SERVER_ERROR out of memory"
151       messages.  this is the most common question on the mailing
152       list.  if you really don't want this, you can rebuild without
153       these three lines.  */
154
155    for (i = POWER_SMALLEST; i <= POWER_LARGEST; i++) {
156        if (++prealloc > maxslabs)
157            return;
158        do_slabs_newslab(i);
159    }
160
161}
162#endif
163
164static int grow_slab_list (const unsigned int id) {
165    slabclass_t *p = &slabclass[id];
166    if (p->slabs == p->list_size) {
167        size_t new_size =  (p->list_size != 0) ? p->list_size * 2 : 16;
168        void *new_list = realloc(p->slab_list, new_size * sizeof(void *));
169        if (new_list == 0) return 0;
170        p->list_size = new_size;
171        p->slab_list = new_list;
172    }
173    return 1;
174}
175
176static int do_slabs_newslab(const unsigned int id) {
177    slabclass_t *p = &slabclass[id];
178#ifdef ALLOW_SLABS_REASSIGN
179    int len = POWER_BLOCK;
180#else
181    int len = p->size * p->perslab;
182#endif
183    char *ptr;
184
185    if (mem_limit && mem_malloced + len > mem_limit && p->slabs > 0)
186        return 0;
187
188    if (grow_slab_list(id) == 0) return 0;
189
190    ptr = malloc((size_t)len);
191    if (ptr == 0) return 0;
192
193    memset(ptr, 0, (size_t)len);
194    p->end_page_ptr = ptr;
195    p->end_page_free = p->perslab;
196
197    p->slab_list[p->slabs++] = ptr;
198    mem_malloced += len;
199    return 1;
200}
201
202/*@null@*/
203void *do_slabs_alloc(const size_t size) {
204    slabclass_t *p;
205
206    unsigned int id = slabs_clsid(size);
207    if (id < POWER_SMALLEST || id > power_largest)
208        return NULL;
209
210    p = &slabclass[id];
211    assert(p->sl_curr == 0 || ((item *)p->slots[p->sl_curr - 1])->slabs_clsid == 0);
212
213#ifdef USE_SYSTEM_MALLOC
214    if (mem_limit && mem_malloced + size > mem_limit)
215        return 0;
216    mem_malloced += size;
217    return malloc(size);
218#endif
219
220    /* fail unless we have space at the end of a recently allocated page,
221       we have something on our freelist, or we could allocate a new page */
222    if (! (p->end_page_ptr != 0 || p->sl_curr != 0 || do_slabs_newslab(id) != 0))
223        return 0;
224
225    /* return off our freelist, if we have one */
226    if (p->sl_curr != 0)
227        return p->slots[--p->sl_curr];
228
229    /* if we recently allocated a whole page, return from that */
230    if (p->end_page_ptr) {
231        void *ptr = p->end_page_ptr;
232        if (--p->end_page_free != 0) {
233            p->end_page_ptr += p->size;
234        } else {
235            p->end_page_ptr = 0;
236        }
237        return ptr;
238    }
239
240    return NULL;  /* shouldn't ever get here */
241}
242
243void do_slabs_free(void *ptr, const size_t size) {
244    unsigned char id = slabs_clsid(size);
245    slabclass_t *p;
246
247    assert(((item *)ptr)->slabs_clsid == 0);
248    assert(id >= POWER_SMALLEST && id <= power_largest);
249    if (id < POWER_SMALLEST || id > power_largest)
250        return;
251
252    p = &slabclass[id];
253
254#ifdef USE_SYSTEM_MALLOC
255    mem_malloced -= size;
256    free(ptr);
257    return;
258#endif
259
260    if (p->sl_curr == p->sl_total) { /* need more space on the free list */
261        int new_size = (p->sl_total != 0) ? p->sl_total * 2 : 16;  /* 16 is arbitrary */
262        void **new_slots = realloc(p->slots, new_size * sizeof(void *));
263        if (new_slots == 0)
264            return;
265        p->slots = new_slots;
266        p->sl_total = new_size;
267    }
268    p->slots[p->sl_curr++] = ptr;
269    return;
270}
271
272/*@null@*/
273char* do_slabs_stats(int *buflen) {
274    int i, total;
275    char *buf = (char *)malloc(power_largest * 200 + 100);
276    char *bufcurr = buf;
277
278    *buflen = 0;
279    if (buf == NULL) return NULL;
280
281    total = 0;
282    for(i = POWER_SMALLEST; i <= power_largest; i++) {
283        slabclass_t *p = &slabclass[i];
284        if (p->slabs != 0) {
285            unsigned int perslab, slabs;
286
287            slabs = p->slabs;
288            perslab = p->perslab;
289
290            bufcurr += sprintf(bufcurr, "STAT %d:chunk_size %u\r\n", i, p->size);
291            bufcurr += sprintf(bufcurr, "STAT %d:chunks_per_page %u\r\n", i, perslab);
292            bufcurr += sprintf(bufcurr, "STAT %d:total_pages %u\r\n", i, slabs);
293            bufcurr += sprintf(bufcurr, "STAT %d:total_chunks %u\r\n", i, slabs*perslab);
294            bufcurr += sprintf(bufcurr, "STAT %d:used_chunks %u\r\n", i, slabs*perslab - p->sl_curr);
295            bufcurr += sprintf(bufcurr, "STAT %d:free_chunks %u\r\n", i, p->sl_curr);
296            bufcurr += sprintf(bufcurr, "STAT %d:free_chunks_end %u\r\n", i, p->end_page_free);
297            total++;
298        }
299    }
300    bufcurr += sprintf(bufcurr, "STAT active_slabs %d\r\nSTAT total_malloced %llu\r\n", total, (unsigned long long)mem_malloced);
301    bufcurr += sprintf(bufcurr, "END\r\n");
302    *buflen = bufcurr - buf;
303    return buf;
304}
305
306#ifdef ALLOW_SLABS_REASSIGN
307/* Blows away all the items in a slab class and moves its slabs to another
308   class. This is only used by the "slabs reassign" command, for manual tweaking
309   of memory allocation. It's disabled by default since it requires that all
310   slabs be the same size (which can waste space for chunk size mantissas of
311   other than 2.0).
312   1 = success
313   0 = fail
314   -1 = tried. busy. send again shortly. */
315int do_slabs_reassign(unsigned char srcid, unsigned char dstid) {
316    void *slab, *slab_end;
317    slabclass_t *p, *dp;
318    void *iter;
319    bool was_busy = false;
320
321    if (srcid < POWER_SMALLEST || srcid > power_largest ||
322        dstid < POWER_SMALLEST || dstid > power_largest)
323        return 0;
324
325    p = &slabclass[srcid];
326    dp = &slabclass[dstid];
327
328    /* fail if src still populating, or no slab to give up in src */
329    if (p->end_page_ptr || ! p->slabs)
330        return 0;
331
332    /* fail if dst is still growing or we can't make room to hold its new one */
333    if (dp->end_page_ptr || ! grow_slab_list(dstid))
334        return 0;
335
336    if (p->killing == 0) p->killing = 1;
337
338    slab = p->slab_list[p->killing - 1];
339    slab_end = (char*)slab + POWER_BLOCK;
340
341    for (iter = slab; iter < slab_end; (char*)iter += p->size) {
342        item *it = (item *)iter;
343        if (it->slabs_clsid) {
344            if (it->refcount) was_busy = true;
345            item_unlink(it);
346        }
347    }
348
349    /* go through free list and discard items that are no longer part of this slab */
350    {
351        int fi;
352        for (fi = p->sl_curr - 1; fi >= 0; fi--) {
353            if (p->slots[fi] >= slab && p->slots[fi] < slab_end) {
354                p->sl_curr--;
355                if (p->sl_curr > fi) p->slots[fi] = p->slots[p->sl_curr];
356            }
357        }
358    }
359
360    if (was_busy) return -1;
361
362    /* if good, now move it to the dst slab class */
363    p->slab_list[p->killing - 1] = p->slab_list[p->slabs - 1];
364    p->slabs--;
365    p->killing = 0;
366    dp->slab_list[dp->slabs++] = slab;
367    dp->end_page_ptr = slab;
368    dp->end_page_free = dp->perslab;
369    /* this isn't too critical, but other parts of the code do asserts to
370       make sure this field is always 0.  */
371    for (iter = slab; iter < slab_end; (char*)iter += dp->size) {
372        ((item *)iter)->slabs_clsid = 0;
373    }
374    return 1;
375}
376#endif
Note: See TracBrowser for help on using the browser.