root/branches/binary/server/thread.c @ 745

Revision 745, 15.9 kB (checked in by dsallings, 21 months ago)

Merged commit 'trunk' into lbinary as of r744

Line 
1/* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/*
3 * Thread management for memcached.
4 *
5 *  $Id$
6 */
7#include "memcached.h"
8#include <stdio.h>
9#include <errno.h>
10#include <stdlib.h>
11#include <errno.h>
12
13#ifdef HAVE_MALLOC_H
14#include <malloc.h>
15#endif
16
17#ifdef HAVE_STRING_H
18#include <string.h>
19#endif
20
21#ifdef USE_THREADS
22
23#include <pthread.h>
24
25#define ITEMS_PER_ALLOC 64
26
27/* An item in the connection queue. */
28typedef struct conn_queue_item CQ_ITEM;
29struct conn_queue_item {
30    int     sfd;
31    int     init_state;
32    int     event_flags;
33    int     read_buffer_size;
34    int     protocol;
35    CQ_ITEM *next;
36};
37
38/* A connection queue. */
39typedef struct conn_queue CQ;
40struct conn_queue {
41    CQ_ITEM *head;
42    CQ_ITEM *tail;
43    pthread_mutex_t lock;
44    pthread_cond_t  cond;
45};
46
47/* Lock for connection freelist */
48static pthread_mutex_t conn_lock;
49
50/* Lock for alternative item suffix freelist */
51static pthread_mutex_t suffix_lock;
52
53/* Lock for cache operations (item_*, assoc_*) */
54static pthread_mutex_t cache_lock;
55
56/* Lock for slab allocator operations */
57static pthread_mutex_t slabs_lock;
58
59/* Lock for global stats */
60static pthread_mutex_t stats_lock;
61
62/* Free list of CQ_ITEM structs */
63static CQ_ITEM *cqi_freelist;
64static pthread_mutex_t cqi_freelist_lock;
65
66/*
67 * Each libevent instance has a wakeup pipe, which other threads
68 * can use to signal that they've put a new connection on its queue.
69 */
70typedef struct {
71    pthread_t thread_id;        /* unique ID of this thread */
72    struct event_base *base;    /* libevent handle this thread uses */
73    struct event notify_event;  /* listen event for notify pipe */
74    int notify_receive_fd;      /* receiving end of notify pipe */
75    int notify_send_fd;         /* sending end of notify pipe */
76    CQ  new_conn_queue;         /* queue of new connections to handle */
77} LIBEVENT_THREAD;
78
79static LIBEVENT_THREAD *threads;
80
81/*
82 * Number of threads that have finished setting themselves up.
83 */
84static int init_count = 0;
85static pthread_mutex_t init_lock;
86static pthread_cond_t init_cond;
87
88
89static void thread_libevent_process(int fd, short which, void *arg);
90
91/*
92 * Initializes a connection queue.
93 */
94static void cq_init(CQ *cq) {
95    pthread_mutex_init(&cq->lock, NULL);
96    pthread_cond_init(&cq->cond, NULL);
97    cq->head = NULL;
98    cq->tail = NULL;
99}
100
101/*
102 * Waits for work on a connection queue.
103 */
104static CQ_ITEM *cq_pop(CQ *cq) {
105    CQ_ITEM *item;
106
107    pthread_mutex_lock(&cq->lock);
108    while (NULL == cq->head)
109        pthread_cond_wait(&cq->cond, &cq->lock);
110    item = cq->head;
111    cq->head = item->next;
112    if (NULL == cq->head)
113        cq->tail = NULL;
114    pthread_mutex_unlock(&cq->lock);
115
116    return item;
117}
118
119/*
120 * Looks for an item on a connection queue, but doesn't block if there isn't
121 * one.
122 * Returns the item, or NULL if no item is available
123 */
124static CQ_ITEM *cq_peek(CQ *cq) {
125    CQ_ITEM *item;
126
127    pthread_mutex_lock(&cq->lock);
128    item = cq->head;
129    if (NULL != item) {
130        cq->head = item->next;
131        if (NULL == cq->head)
132            cq->tail = NULL;
133    }
134    pthread_mutex_unlock(&cq->lock);
135
136    return item;
137}
138
139/*
140 * Adds an item to a connection queue.
141 */
142static void cq_push(CQ *cq, CQ_ITEM *item) {
143    item->next = NULL;
144
145    pthread_mutex_lock(&cq->lock);
146    if (NULL == cq->tail)
147        cq->head = item;
148    else
149        cq->tail->next = item;
150    cq->tail = item;
151    pthread_cond_signal(&cq->cond);
152    pthread_mutex_unlock(&cq->lock);
153}
154
155/*
156 * Returns a fresh connection queue item.
157 */
158static CQ_ITEM *cqi_new() {
159    CQ_ITEM *item = NULL;
160    pthread_mutex_lock(&cqi_freelist_lock);
161    if (cqi_freelist) {
162        item = cqi_freelist;
163        cqi_freelist = item->next;
164    }
165    pthread_mutex_unlock(&cqi_freelist_lock);
166
167    if (NULL == item) {
168        int i;
169
170        /* Allocate a bunch of items at once to reduce fragmentation */
171        item = malloc(sizeof(CQ_ITEM) * ITEMS_PER_ALLOC);
172        if (NULL == item)
173            return NULL;
174
175        /*
176         * Link together all the new items except the first one
177         * (which we'll return to the caller) for placement on
178         * the freelist.
179         */
180        for (i = 2; i < ITEMS_PER_ALLOC; i++)
181            item[i - 1].next = &item[i];
182
183        pthread_mutex_lock(&cqi_freelist_lock);
184        item[ITEMS_PER_ALLOC - 1].next = cqi_freelist;
185        cqi_freelist = &item[1];
186        pthread_mutex_unlock(&cqi_freelist_lock);
187    }
188
189    return item;
190}
191
192
193/*
194 * Frees a connection queue item (adds it to the freelist.)
195 */
196static void cqi_free(CQ_ITEM *item) {
197    pthread_mutex_lock(&cqi_freelist_lock);
198    item->next = cqi_freelist;
199    cqi_freelist = item;
200    pthread_mutex_unlock(&cqi_freelist_lock);
201}
202
203
204/*
205 * Creates a worker thread.
206 */
207static void create_worker(void *(*func)(void *), void *arg) {
208    pthread_t       thread;
209    pthread_attr_t  attr;
210    int             ret;
211
212    pthread_attr_init(&attr);
213
214    if ((ret = pthread_create(&thread, &attr, func, arg)) != 0) {
215        fprintf(stderr, "Can't create thread: %s\n",
216                strerror(ret));
217        exit(1);
218    }
219}
220
221
222/*
223 * Pulls a conn structure from the freelist, if one is available.
224 */
225conn *mt_conn_from_freelist() {
226    conn *c;
227
228    pthread_mutex_lock(&conn_lock);
229    c = do_conn_from_freelist();
230    pthread_mutex_unlock(&conn_lock);
231
232    return c;
233}
234
235
236/*
237 * Adds a conn structure to the freelist.
238 *
239 * Returns 0 on success, 1 if the structure couldn't be added.
240 */
241bool mt_conn_add_to_freelist(conn *c) {
242    bool result;
243
244    pthread_mutex_lock(&conn_lock);
245    result = do_conn_add_to_freelist(c);
246    pthread_mutex_unlock(&conn_lock);
247
248    return result;
249}
250
251/*
252 * Pulls a suffix buffer from the freelist, if one is available.
253 */
254char *mt_suffix_from_freelist() {
255    char *s;
256
257    pthread_mutex_lock(&suffix_lock);
258    s = do_suffix_from_freelist();
259    pthread_mutex_unlock(&suffix_lock);
260
261    return s;
262}
263
264
265/*
266 * Adds a suffix buffer to the freelist.
267 *
268 * Returns 0 on success, 1 if the buffer couldn't be added.
269 */
270bool mt_suffix_add_to_freelist(char *s) {
271    bool result;
272
273    pthread_mutex_lock(&suffix_lock);
274    result = do_suffix_add_to_freelist(s);
275    pthread_mutex_unlock(&suffix_lock);
276
277    return result;
278}
279
280
281/****************************** LIBEVENT THREADS *****************************/
282
283/*
284 * Set up a thread's information.
285 */
286static void setup_thread(LIBEVENT_THREAD *me) {
287    if (! me->base) {
288        me->base = event_init();
289        if (! me->base) {
290            fprintf(stderr, "Can't allocate event base\n");
291            exit(1);
292        }
293    }
294
295    /* Listen for notifications from other threads */
296    event_set(&me->notify_event, me->notify_receive_fd,
297              EV_READ | EV_PERSIST, thread_libevent_process, me);
298    event_base_set(me->base, &me->notify_event);
299
300    if (event_add(&me->notify_event, 0) == -1) {
301        fprintf(stderr, "Can't monitor libevent notify pipe\n");
302        exit(1);
303    }
304
305    cq_init(&me->new_conn_queue);
306}
307
308
309/*
310 * Worker thread: main event loop
311 */
312static void *worker_libevent(void *arg) {
313    LIBEVENT_THREAD *me = arg;
314
315    /* Any per-thread setup can happen here; thread_init() will block until
316     * all threads have finished initializing.
317     */
318
319    pthread_mutex_lock(&init_lock);
320    init_count++;
321    pthread_cond_signal(&init_cond);
322    pthread_mutex_unlock(&init_lock);
323
324    return (void*) event_base_loop(me->base, 0);
325}
326
327
328/*
329 * Processes an incoming "handle a new connection" item. This is called when
330 * input arrives on the libevent wakeup pipe.
331 */
332static void thread_libevent_process(int fd, short which, void *arg) {
333    LIBEVENT_THREAD *me = arg;
334    CQ_ITEM *item;
335    char buf[1];
336
337    if (read(fd, buf, 1) != 1)
338        if (settings.verbose > 0)
339            fprintf(stderr, "Can't read from libevent pipe\n");
340
341    item = cq_peek(&me->new_conn_queue);
342
343    if (NULL != item) {
344        conn *c = conn_new(item->sfd, item->init_state, item->event_flags,
345                           item->read_buffer_size, item->protocol, me->base);
346        if (c == NULL) {
347            if (IS_UDP(item->protocol)) {
348                fprintf(stderr, "Can't listen for events on UDP socket\n");
349                exit(1);
350            } else {
351                if (settings.verbose > 0) {
352                    fprintf(stderr, "Can't listen for events on fd %d\n",
353                        item->sfd);
354                }
355                close(item->sfd);
356            }
357        }
358        cqi_free(item);
359    }
360}
361
362/* Which thread we assigned a connection to most recently. */
363static int last_thread = -1;
364
365/*
366 * Dispatches a new connection to another thread. This is only ever called
367 * from the main thread, either during initialization (for UDP) or because
368 * of an incoming connection.
369 */
370void dispatch_conn_new(int sfd, int init_state, int event_flags,
371                       int read_buffer_size, int prot) {
372    CQ_ITEM *item = cqi_new();
373    int thread = (last_thread + 1) % settings.num_threads;
374
375    last_thread = thread;
376
377    item->sfd = sfd;
378    item->init_state = init_state;
379    item->event_flags = event_flags;
380    item->read_buffer_size = read_buffer_size;
381    item->protocol = prot;
382
383    cq_push(&threads[thread].new_conn_queue, item);
384    if (write(threads[thread].notify_send_fd, "", 1) != 1) {
385        perror("Writing to thread notify pipe");
386    }
387}
388
389/*
390 * Returns true if this is the thread that listens for new TCP connections.
391 */
392int mt_is_listen_thread() {
393    return pthread_self() == threads[0].thread_id;
394}
395
396/********************************* ITEM ACCESS *******************************/
397
398/*
399 * Walks through the list of deletes that have been deferred because the items
400 * were locked down at the tmie.
401 */
402void mt_run_deferred_deletes() {
403    pthread_mutex_lock(&cache_lock);
404    do_run_deferred_deletes();
405    pthread_mutex_unlock(&cache_lock);
406}
407
408/*
409 * Allocates a new item.
410 */
411item *mt_item_alloc(char *key, size_t nkey, int flags, rel_time_t exptime, int nbytes) {
412    item *it;
413    pthread_mutex_lock(&cache_lock);
414    it = do_item_alloc(key, nkey, flags, exptime, nbytes);
415    pthread_mutex_unlock(&cache_lock);
416    return it;
417}
418
419/*
420 * Returns an item if it hasn't been marked as expired or deleted,
421 * lazy-expiring as needed.
422 */
423item *mt_item_get_notedeleted(const char *key, const size_t nkey, bool *delete_locked) {
424    item *it;
425    pthread_mutex_lock(&cache_lock);
426    it = do_item_get_notedeleted(key, nkey, delete_locked);
427    pthread_mutex_unlock(&cache_lock);
428    return it;
429}
430
431/*
432 * Links an item into the LRU and hashtable.
433 */
434int mt_item_link(item *item) {
435    int ret;
436
437    pthread_mutex_lock(&cache_lock);
438    ret = do_item_link(item);
439    pthread_mutex_unlock(&cache_lock);
440    return ret;
441}
442
443/*
444 * Decrements the reference count on an item and adds it to the freelist if
445 * needed.
446 */
447void mt_item_remove(item *item) {
448    pthread_mutex_lock(&cache_lock);
449    do_item_remove(item);
450    pthread_mutex_unlock(&cache_lock);
451}
452
453/*
454 * Replaces one item with another in the hashtable.
455 */
456int mt_item_replace(item *old, item *new) {
457    int ret;
458
459    pthread_mutex_lock(&cache_lock);
460    ret = do_item_replace(old, new);
461    pthread_mutex_unlock(&cache_lock);
462    return ret;
463}
464
465/*
466 * Unlinks an item from the LRU and hashtable.
467 */
468void mt_item_unlink(item *item) {
469    pthread_mutex_lock(&cache_lock);
470    do_item_unlink(item);
471    pthread_mutex_unlock(&cache_lock);
472}
473
474/*
475 * Moves an item to the back of the LRU queue.
476 */
477void mt_item_update(item *item) {
478    pthread_mutex_lock(&cache_lock);
479    do_item_update(item);
480    pthread_mutex_unlock(&cache_lock);
481}
482
483/*
484 * Adds an item to the deferred-delete list so it can be reaped later.
485 */
486char *mt_defer_delete(item *item, time_t exptime) {
487    char *ret;
488
489    pthread_mutex_lock(&cache_lock);
490    ret = do_defer_delete(item, exptime);
491    pthread_mutex_unlock(&cache_lock);
492    return ret;
493}
494
495/*
496 * Does arithmetic on a numeric item value.
497 */
498char *mt_add_delta(item *item, int incr, const int64_t delta, char *buf) {
499    char *ret;
500
501    pthread_mutex_lock(&cache_lock);
502    ret = do_add_delta(item, incr, delta, buf);
503    pthread_mutex_unlock(&cache_lock);
504    return ret;
505}
506
507/*
508 * Stores an item in the cache (high level, obeys set/add/replace semantics)
509 */
510int mt_store_item(item *item, int comm) {
511    int ret;
512
513    pthread_mutex_lock(&cache_lock);
514    ret = do_store_item(item, comm);
515    pthread_mutex_unlock(&cache_lock);
516    return ret;
517}
518
519/*
520 * Flushes expired items after a flush_all call
521 */
522void mt_item_flush_expired() {
523    pthread_mutex_lock(&cache_lock);
524    do_item_flush_expired();
525    pthread_mutex_unlock(&cache_lock);
526}
527
528/*
529 * Dumps part of the cache
530 */
531char *mt_item_cachedump(unsigned int slabs_clsid, unsigned int limit, unsigned int *bytes) {
532    char *ret;
533
534    pthread_mutex_lock(&cache_lock);
535    ret = do_item_cachedump(slabs_clsid, limit, bytes);
536    pthread_mutex_unlock(&cache_lock);
537    return ret;
538}
539
540/*
541 * Dumps statistics about slab classes
542 */
543char *mt_item_stats(int *bytes) {
544    char *ret;
545
546    pthread_mutex_lock(&cache_lock);
547    ret = do_item_stats(bytes);
548    pthread_mutex_unlock(&cache_lock);
549    return ret;
550}
551
552/*
553 * Dumps a list of objects of each size in 32-byte increments
554 */
555char *mt_item_stats_sizes(int *bytes) {
556    char *ret;
557
558    pthread_mutex_lock(&cache_lock);
559    ret = do_item_stats_sizes(bytes);
560    pthread_mutex_unlock(&cache_lock);
561    return ret;
562}
563
564/****************************** HASHTABLE MODULE *****************************/
565
566void mt_assoc_move_next_bucket() {
567    pthread_mutex_lock(&cache_lock);
568    do_assoc_move_next_bucket();
569    pthread_mutex_unlock(&cache_lock);
570}
571
572/******************************* SLAB ALLOCATOR ******************************/
573
574void *mt_slabs_alloc(size_t size, unsigned int id) {
575    void *ret;
576
577    pthread_mutex_lock(&slabs_lock);
578    ret = do_slabs_alloc(size, id);
579    pthread_mutex_unlock(&slabs_lock);
580    return ret;
581}
582
583void mt_slabs_free(void *ptr, size_t size, unsigned int id) {
584    pthread_mutex_lock(&slabs_lock);
585    do_slabs_free(ptr, size, id);
586    pthread_mutex_unlock(&slabs_lock);
587}
588
589char *mt_slabs_stats(int *buflen) {
590    char *ret;
591
592    pthread_mutex_lock(&slabs_lock);
593    ret = do_slabs_stats(buflen);
594    pthread_mutex_unlock(&slabs_lock);
595    return ret;
596}
597
598#ifdef ALLOW_SLABS_REASSIGN
599int mt_slabs_reassign(unsigned char srcid, unsigned char dstid) {
600    int ret;
601
602    pthread_mutex_lock(&slabs_lock);
603    ret = do_slabs_reassign(srcid, dstid);
604    pthread_mutex_unlock(&slabs_lock);
605    return ret;
606}
607#endif
608
609/******************************* GLOBAL STATS ******************************/
610
611void mt_stats_lock() {
612    pthread_mutex_lock(&stats_lock);
613}
614
615void mt_stats_unlock() {
616    pthread_mutex_unlock(&stats_lock);
617}
618
619/*
620 * Initializes the thread subsystem, creating various worker threads.
621 *
622 * nthreads  Number of event handler threads to spawn
623 * main_base Event base for main thread
624 */
625void thread_init(int nthreads, struct event_base *main_base) {
626    int         i;
627
628    pthread_mutex_init(&cache_lock, NULL);
629    pthread_mutex_init(&conn_lock, NULL);
630    pthread_mutex_init(&slabs_lock, NULL);
631    pthread_mutex_init(&stats_lock, NULL);
632
633    pthread_mutex_init(&init_lock, NULL);
634    pthread_cond_init(&init_cond, NULL);
635
636    pthread_mutex_init(&cqi_freelist_lock, NULL);
637    cqi_freelist = NULL;
638
639    threads = malloc(sizeof(LIBEVENT_THREAD) * nthreads);
640    if (! threads) {
641        perror("Can't allocate thread descriptors");
642        exit(1);
643    }
644
645    threads[0].base = main_base;
646    threads[0].thread_id = pthread_self();
647
648    for (i = 0; i < nthreads; i++) {
649        int fds[2];
650        if (pipe(fds)) {
651            perror("Can't create notify pipe");
652            exit(1);
653        }
654
655        threads[i].notify_receive_fd = fds[0];
656        threads[i].notify_send_fd = fds[1];
657
658    setup_thread(&threads[i]);
659    }
660
661    /* Create threads after we've done all the libevent setup. */
662    for (i = 1; i < nthreads; i++) {
663        create_worker(worker_libevent, &threads[i]);
664    }
665
666    /* Wait for all the threads to set themselves up before returning. */
667    pthread_mutex_lock(&init_lock);
668    init_count++; /* main thread */
669    while (init_count < nthreads) {
670        pthread_cond_wait(&init_cond, &init_lock);
671    }
672    pthread_mutex_unlock(&init_lock);
673}
674
675#endif
Note: See TracBrowser for help on using the browser.