root/trunk/server/memcached.h @ 738

Revision 738, 13.9 kB (checked in by dormando, 21 months ago)

Don't re-calculate the slab class id.
slabs_alloc() internally calls slabs_clsid(), so an eviction case would crawl the list of slab classes three times.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
Line 
1/* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/* $Id$ */
3
4#ifdef HAVE_CONFIG_H
5#include "config.h"
6#endif
7
8#include <sys/types.h>
9#include <sys/socket.h>
10#include <sys/time.h>
11#include <netinet/in.h>
12#include <event.h>
13#include <netdb.h>
14
15#define DATA_BUFFER_SIZE 2048
16#define UDP_READ_BUFFER_SIZE 65536
17#define UDP_MAX_PAYLOAD_SIZE 1400
18#define UDP_HEADER_SIZE 8
19#define MAX_SENDBUF_SIZE (256 * 1024 * 1024)
20/* I'm told the max legnth of a 64-bit num converted to string is 20 bytes.
21 * Plus a few for spaces, \r\n, \0 */
22#define SUFFIX_SIZE 24
23
24/** Initial size of list of items being returned by "get". */
25#define ITEM_LIST_INITIAL 200
26
27/** Initial size of list of CAS suffixes appended to "gets" lines. */
28#define SUFFIX_LIST_INITIAL 20
29
30/** Initial size of the sendmsg() scatter/gather array. */
31#define IOV_LIST_INITIAL 400
32
33/** Initial number of sendmsg() argument structures to allocate. */
34#define MSG_LIST_INITIAL 10
35
36/** High water marks for buffer shrinking */
37#define READ_BUFFER_HIGHWAT 8192
38#define ITEM_LIST_HIGHWAT 400
39#define IOV_LIST_HIGHWAT 600
40#define MSG_LIST_HIGHWAT 100
41
42/* Get a consistent bool type */
43#if HAVE_STDBOOL_H
44# include <stdbool.h>
45#else
46  typedef enum {false = 0, true = 1} bool;
47#endif
48
49#if HAVE_STDINT_H
50# include <stdint.h>
51#else
52 typedef unsigned char             uint8_t;
53#endif
54
55/* unistd.h is here */
56#if HAVE_UNISTD_H
57# include <unistd.h>
58#endif
59
60/** Time relative to server start. Smaller than time_t on 64-bit systems. */
61typedef unsigned int rel_time_t;
62
63struct stats {
64    unsigned int  curr_items;
65    unsigned int  total_items;
66    uint64_t      curr_bytes;
67    unsigned int  curr_conns;
68    unsigned int  total_conns;
69    unsigned int  conn_structs;
70    uint64_t      get_cmds;
71    uint64_t      set_cmds;
72    uint64_t      get_hits;
73    uint64_t      get_misses;
74    uint64_t      evictions;
75    time_t        started;          /* when the process was started */
76    uint64_t      bytes_read;
77    uint64_t      bytes_written;
78};
79
80#define MAX_VERBOSITY_LEVEL 2
81
82struct settings {
83    size_t maxbytes;
84    int maxconns;
85    int port;
86    int udpport;
87    char *inter;
88    int verbose;
89    rel_time_t oldest_live; /* ignore existing items older than this */
90    bool managed;          /* if 1, a tracker manages virtual buckets */
91    int evict_to_free;
92    char *socketpath;   /* path to unix socket if using local socket */
93    int access;  /* access mask (a la chmod) for unix domain socket */
94    double factor;          /* chunk size growth factor */
95    int chunk_size;
96    int num_threads;        /* number of libevent threads to run */
97    char prefix_delimiter;  /* character that marks a key prefix (for stats) */
98    int detail_enabled;     /* nonzero if we're collecting detailed stats */
99};
100
101extern struct stats stats;
102extern struct settings settings;
103
104#define ITEM_LINKED 1
105#define ITEM_DELETED 2
106
107/* temp */
108#define ITEM_SLABBED 4
109
110typedef struct _stritem {
111    struct _stritem *next;
112    struct _stritem *prev;
113    struct _stritem *h_next;    /* hash chain next */
114    rel_time_t      time;       /* least recent access */
115    rel_time_t      exptime;    /* expire time */
116    int             nbytes;     /* size of data */
117    unsigned short  refcount;
118    uint8_t         nsuffix;    /* length of flags-and-length string */
119    uint8_t         it_flags;   /* ITEM_* above */
120    uint8_t         slabs_clsid;/* which slab class we're in */
121    uint8_t         nkey;       /* key length, w/terminating null and padding */
122    uint64_t        cas_id;     /* the CAS identifier */
123    void * end[];
124    /* then null-terminated key */
125    /* then " flags length\r\n" (no terminating null) */
126    /* then data with terminating \r\n (no terminating null; it's binary!) */
127} item;
128
129#define ITEM_key(item) ((char*)&((item)->end[0]))
130
131/* warning: don't use these macros with a function, as it evals its arg twice */
132#define ITEM_suffix(item) ((char*) &((item)->end[0]) + (item)->nkey + 1)
133#define ITEM_data(item) ((char*) &((item)->end[0]) + (item)->nkey + 1 + (item)->nsuffix)
134#define ITEM_ntotal(item) (sizeof(struct _stritem) + (item)->nkey + 1 + (item)->nsuffix + (item)->nbytes)
135
136enum conn_states {
137    conn_listening,  /** the socket which listens for connections */
138    conn_read,       /** reading in a command line */
139    conn_write,      /** writing out a simple response */
140    conn_nread,      /** reading in a fixed number of bytes */
141    conn_swallow,    /** swallowing unnecessary bytes w/o storing */
142    conn_closing,    /** closing this connection */
143    conn_mwrite,     /** writing out many items sequentially */
144};
145
146#define NREAD_ADD 1
147#define NREAD_SET 2
148#define NREAD_REPLACE 3
149#define NREAD_APPEND 4
150#define NREAD_PREPEND 5
151#define NREAD_CAS 6
152
153typedef struct conn conn;
154struct conn {
155    int    sfd;
156    int    state;
157    struct event event;
158    short  ev_flags;
159    short  which;   /** which events were just triggered */
160
161    char   *rbuf;   /** buffer to read commands into */
162    char   *rcurr;  /** but if we parsed some already, this is where we stopped */
163    int    rsize;   /** total allocated size of rbuf */
164    int    rbytes;  /** how much data, starting from rcur, do we have unparsed */
165
166    char   *wbuf;
167    char   *wcurr;
168    int    wsize;
169    int    wbytes;
170    int    write_and_go; /** which state to go into after finishing current write */
171    void   *write_and_free; /** free this memory after finishing writing */
172
173    char   *ritem;  /** when we read in an item's value, it goes here */
174    int    rlbytes;
175
176    /* data for the nread state */
177
178    /**
179     * item is used to hold an item structure created after reading the command
180     * line of set/add/replace commands, but before we finished reading the actual
181     * data. The data is read into ITEM_data(item) to avoid extra copying.
182     */
183
184    void   *item;     /* for commands set/add/replace  */
185    int    item_comm; /* which one is it: set/add/replace */
186
187    /* data for the swallow state */
188    int    sbytes;    /* how many bytes to swallow */
189
190    /* data for the mwrite state */
191    struct iovec *iov;
192    int    iovsize;   /* number of elements allocated in iov[] */
193    int    iovused;   /* number of elements used in iov[] */
194
195    struct msghdr *msglist;
196    int    msgsize;   /* number of elements allocated in msglist[] */
197    int    msgused;   /* number of elements used in msglist[] */
198    int    msgcurr;   /* element in msglist[] being transmitted now */
199    int    msgbytes;  /* number of bytes in current msg */
200
201    item   **ilist;   /* list of items to write out */
202    int    isize;
203    item   **icurr;
204    int    ileft;
205
206    char   **suffixlist;
207    int    suffixsize;
208    char   **suffixcurr;
209    int    suffixleft;
210
211    /* data for UDP clients */
212    bool   udp;       /* is this is a UDP "connection" */
213    int    request_id; /* Incoming UDP request ID, if this is a UDP "connection" */
214    struct sockaddr request_addr; /* Who sent the most recent request */
215    socklen_t request_addr_size;
216    unsigned char *hdrbuf; /* udp packet headers */
217    int    hdrsize;   /* number of headers' worth of space is allocated */
218
219    int    binary;    /* are we in binary mode */
220    int    bucket;    /* bucket number for the next command, if running as
221                         a managed instance. -1 (_not_ 0) means invalid. */
222    int    gen;       /* generation requested for the bucket */
223    bool   noreply;   /* True if the reply should not be sent. */
224    conn   *next;     /* Used for generating a list of conn structures */
225};
226
227/* number of virtual buckets for a managed instance */
228#define MAX_BUCKETS 32768
229
230/* current time of day (updated periodically) */
231extern volatile rel_time_t current_time;
232
233/*
234 * Functions
235 */
236
237conn *do_conn_from_freelist();
238bool do_conn_add_to_freelist(conn *c);
239char *do_suffix_from_freelist();
240bool do_suffix_add_to_freelist(char *s);
241char *do_defer_delete(item *item, time_t exptime);
242void do_run_deferred_deletes(void);
243char *do_add_delta(item *item, const bool incr, const int64_t delta, char *buf);
244int do_store_item(item *item, int comm);
245conn *conn_new(const int sfd, const int init_state, const int event_flags, const int read_buffer_size, const bool is_udp, struct event_base *base);
246
247
248#include "stats.h"
249#include "slabs.h"
250#include "assoc.h"
251#include "items.h"
252
253
254/*
255 * In multithreaded mode, we wrap certain functions with lock management and
256 * replace the logic of some other functions. All wrapped functions have
257 * "mt_" and "do_" variants. In multithreaded mode, the plain version of a
258 * function is #define-d to the "mt_" variant, which often just grabs a
259 * lock and calls the "do_" function. In singlethreaded mode, the "do_"
260 * function is called directly.
261 *
262 * Functions such as the libevent-related calls that need to do cross-thread
263 * communication in multithreaded mode (rather than actually doing the work
264 * in the current thread) are called via "dispatch_" frontends, which are
265 * also #define-d to directly call the underlying code in singlethreaded mode.
266 */
267#ifdef USE_THREADS
268
269void thread_init(int nthreads, struct event_base *main_base);
270int  dispatch_event_add(int thread, conn *c);
271void dispatch_conn_new(int sfd, int init_state, int event_flags, int read_buffer_size, int is_udp);
272
273/* Lock wrappers for cache functions that are called from main loop. */
274char *mt_add_delta(item *item, const int incr, const int64_t delta, char *buf);
275void mt_assoc_move_next_bucket(void);
276conn *mt_conn_from_freelist(void);
277bool  mt_conn_add_to_freelist(conn *c);
278char *mt_suffix_from_freelist(void);
279bool  mt_suffix_add_to_freelist(char *s);
280char *mt_defer_delete(item *it, time_t exptime);
281int   mt_is_listen_thread(void);
282item *mt_item_alloc(char *key, size_t nkey, int flags, rel_time_t exptime, int nbytes);
283char *mt_item_cachedump(const unsigned int slabs_clsid, const unsigned int limit, unsigned int *bytes);
284void  mt_item_flush_expired(void);
285item *mt_item_get_notedeleted(const char *key, const size_t nkey, bool *delete_locked);
286int   mt_item_link(item *it);
287void  mt_item_remove(item *it);
288int   mt_item_replace(item *it, item *new_it);
289char *mt_item_stats(int *bytes);
290char *mt_item_stats_sizes(int *bytes);
291void  mt_item_unlink(item *it);
292void  mt_item_update(item *it);
293void  mt_run_deferred_deletes(void);
294void *mt_slabs_alloc(size_t size, unsigned int id);
295void  mt_slabs_free(void *ptr, size_t size);
296int   mt_slabs_reassign(unsigned char srcid, unsigned char dstid);
297char *mt_slabs_stats(int *buflen);
298void  mt_stats_lock(void);
299void  mt_stats_unlock(void);
300int   mt_store_item(item *item, int comm);
301
302
303# define add_delta(x,y,z,a)          mt_add_delta(x,y,z,a)
304# define assoc_move_next_bucket()    mt_assoc_move_next_bucket()
305# define conn_from_freelist()        mt_conn_from_freelist()
306# define conn_add_to_freelist(x)     mt_conn_add_to_freelist(x)
307# define suffix_from_freelist()      mt_suffix_from_freelist()
308# define suffix_add_to_freelist(x)   mt_suffix_add_to_freelist(x)
309# define defer_delete(x,y)           mt_defer_delete(x,y)
310# define is_listen_thread()          mt_is_listen_thread()
311# define item_alloc(x,y,z,a,b)       mt_item_alloc(x,y,z,a,b)
312# define item_cachedump(x,y,z)       mt_item_cachedump(x,y,z)
313# define item_flush_expired()        mt_item_flush_expired()
314# define item_get_notedeleted(x,y,z) mt_item_get_notedeleted(x,y,z)
315# define item_link(x)                mt_item_link(x)
316# define item_remove(x)              mt_item_remove(x)
317# define item_replace(x,y)           mt_item_replace(x,y)
318# define item_stats(x)               mt_item_stats(x)
319# define item_stats_sizes(x)         mt_item_stats_sizes(x)
320# define item_update(x)              mt_item_update(x)
321# define item_unlink(x)              mt_item_unlink(x)
322# define run_deferred_deletes()      mt_run_deferred_deletes()
323# define slabs_alloc(x,y)            mt_slabs_alloc(x,y)
324# define slabs_free(x,y)             mt_slabs_free(x,y)
325# define slabs_reassign(x,y)         mt_slabs_reassign(x,y)
326# define slabs_stats(x)              mt_slabs_stats(x)
327# define store_item(x,y)             mt_store_item(x,y)
328
329# define STATS_LOCK()                mt_stats_lock()
330# define STATS_UNLOCK()              mt_stats_unlock()
331
332#else /* !USE_THREADS */
333
334# define add_delta(x,y,z,a)          do_add_delta(x,y,z,a)
335# define assoc_move_next_bucket()    do_assoc_move_next_bucket()
336# define conn_from_freelist()        do_conn_from_freelist()
337# define conn_add_to_freelist(x)     do_conn_add_to_freelist(x)
338# define suffix_from_freelist()      do_suffix_from_freelist()
339# define suffix_add_to_freelist(x)   do_suffix_add_to_freelist(x)
340# define defer_delete(x,y)           do_defer_delete(x,y)
341# define dispatch_conn_new(x,y,z,a,b) conn_new(x,y,z,a,b,main_base)
342# define dispatch_event_add(t,c)     event_add(&(c)->event, 0)
343# define is_listen_thread()          1
344# define item_alloc(x,y,z,a,b)       do_item_alloc(x,y,z,a,b)
345# define item_cachedump(x,y,z)       do_item_cachedump(x,y,z)
346# define item_flush_expired()        do_item_flush_expired()
347# define item_get_notedeleted(x,y,z) do_item_get_notedeleted(x,y,z)
348# define item_link(x)                do_item_link(x)
349# define item_remove(x)              do_item_remove(x)
350# define item_replace(x,y)           do_item_replace(x,y)
351# define item_stats(x)               do_item_stats(x)
352# define item_stats_sizes(x)         do_item_stats_sizes(x)
353# define item_unlink(x)              do_item_unlink(x)
354# define item_update(x)              do_item_update(x)
355# define run_deferred_deletes()      do_run_deferred_deletes()
356# define slabs_alloc(x,y)            do_slabs_alloc(x,y)
357# define slabs_free(x,y)             do_slabs_free(x,y)
358# define slabs_reassign(x,y)         do_slabs_reassign(x,y)
359# define slabs_stats(x)              do_slabs_stats(x)
360# define store_item(x,y)             do_store_item(x,y)
361# define thread_init(x,y)            0
362
363# define STATS_LOCK()                /**/
364# define STATS_UNLOCK()              /**/
365
366#endif /* !USE_THREADS */
367
368/* If supported, give compiler hints for branch prediction. */
369#if !defined(__GNUC__) || (__GNUC__ == 2 && __GNUC_MINOR__ < 96)
370#define __builtin_expect(x, expected_value) (x)
371#endif
372
373#define likely(x)       __builtin_expect((x),1)
374#define unlikely(x)     __builtin_expect((x),0)
Note: See TracBrowser for help on using the browser.