| | varnish-cache/bin/varnishd/cache/cache_acceptor.c |
0 |
|
/*- |
1 |
|
* Copyright (c) 2006 Verdens Gang AS |
2 |
|
* Copyright (c) 2006-2015 Varnish Software AS |
3 |
|
* All rights reserved. |
4 |
|
* |
5 |
|
* Author: Poul-Henning Kamp <phk@phk.freebsd.dk> |
6 |
|
* |
7 |
|
* SPDX-License-Identifier: BSD-2-Clause |
8 |
|
* |
9 |
|
* Redistribution and use in source and binary forms, with or without |
10 |
|
* modification, are permitted provided that the following conditions |
11 |
|
* are met: |
12 |
|
* 1. Redistributions of source code must retain the above copyright |
13 |
|
* notice, this list of conditions and the following disclaimer. |
14 |
|
* 2. Redistributions in binary form must reproduce the above copyright |
15 |
|
* notice, this list of conditions and the following disclaimer in the |
16 |
|
* documentation and/or other materials provided with the distribution. |
17 |
|
* |
18 |
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
19 |
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 |
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
21 |
|
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE |
22 |
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 |
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
24 |
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
25 |
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
26 |
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
27 |
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
28 |
|
* SUCH DAMAGE. |
29 |
|
* |
30 |
|
* This source file has the various trickery surrounding the accept/listen |
31 |
|
* sockets. |
32 |
|
* |
33 |
|
*/ |
34 |
|
|
35 |
|
#include "config.h" |
36 |
|
|
37 |
|
#include <stdlib.h> |
38 |
|
#include <netinet/in.h> |
39 |
|
#include <netinet/tcp.h> |
40 |
|
|
41 |
|
#include "cache_varnishd.h" |
42 |
|
|
43 |
|
#include "cache_transport.h" |
44 |
|
#include "cache_pool.h" |
45 |
|
#include "common/heritage.h" |
46 |
|
|
47 |
|
#include "vcli_serve.h" |
48 |
|
#include "vsa.h" |
49 |
|
#include "vtcp.h" |
50 |
|
#include "vtim.h" |
51 |
|
|
52 |
|
static pthread_t VCA_thread; |
53 |
|
static vtim_dur vca_pace = 0.0; |
54 |
|
static struct lock pace_mtx; |
55 |
|
static unsigned pool_accepting; |
56 |
|
static pthread_mutex_t shut_mtx = PTHREAD_MUTEX_INITIALIZER; |
57 |
|
|
58 |
|
struct wrk_accept { |
59 |
|
unsigned magic; |
60 |
|
#define WRK_ACCEPT_MAGIC 0x8c4b4d59 |
61 |
|
|
62 |
|
/* Accept stuff */ |
63 |
|
struct sockaddr_storage acceptaddr; |
64 |
|
socklen_t acceptaddrlen; |
65 |
|
int acceptsock; |
66 |
|
struct listen_sock *acceptlsock; |
67 |
|
}; |
68 |
|
|
69 |
|
struct poolsock { |
70 |
|
unsigned magic; |
71 |
|
#define POOLSOCK_MAGIC 0x1b0a2d38 |
72 |
|
VTAILQ_ENTRY(poolsock) list; |
73 |
|
struct listen_sock *lsock; |
74 |
|
struct pool_task task[1]; |
75 |
|
struct pool *pool; |
76 |
|
}; |
77 |
|
|
78 |
|
/*-------------------------------------------------------------------- |
79 |
|
* TCP options we want to control |
80 |
|
*/ |
81 |
|
|
82 |
|
union sock_arg { |
83 |
|
struct linger lg; |
84 |
|
struct timeval tv; |
85 |
|
int i; |
86 |
|
}; |
87 |
|
|
88 |
|
static struct sock_opt { |
89 |
|
int level; |
90 |
|
int optname; |
91 |
|
const char *strname; |
92 |
|
unsigned mod; |
93 |
|
socklen_t sz; |
94 |
|
union sock_arg arg[1]; |
95 |
|
} sock_opts[] = { |
96 |
|
/* Note: Setting the mod counter to something not-zero is needed |
97 |
|
* to force the setsockopt() calls on startup */ |
98 |
|
#define SOCK_OPT(lvl, nam, typ) { lvl, nam, #nam, 1, sizeof(typ) }, |
99 |
|
|
100 |
|
SOCK_OPT(SOL_SOCKET, SO_LINGER, struct linger) |
101 |
|
SOCK_OPT(SOL_SOCKET, SO_KEEPALIVE, int) |
102 |
|
SOCK_OPT(SOL_SOCKET, SO_SNDTIMEO, struct timeval) |
103 |
|
SOCK_OPT(SOL_SOCKET, SO_RCVTIMEO, struct timeval) |
104 |
|
|
105 |
|
SOCK_OPT(IPPROTO_TCP, TCP_NODELAY, int) |
106 |
|
|
107 |
|
#if defined(HAVE_TCP_KEEP) |
108 |
|
SOCK_OPT(IPPROTO_TCP, TCP_KEEPIDLE, int) |
109 |
|
SOCK_OPT(IPPROTO_TCP, TCP_KEEPCNT, int) |
110 |
|
SOCK_OPT(IPPROTO_TCP, TCP_KEEPINTVL, int) |
111 |
|
#elif defined(HAVE_TCP_KEEPALIVE) |
112 |
|
SOCK_OPT(IPPROTO_TCP, TCP_KEEPALIVE, int) |
113 |
|
#endif |
114 |
|
|
115 |
|
#undef SOCK_OPT |
116 |
|
}; |
117 |
|
|
118 |
|
static const int n_sock_opts = sizeof sock_opts / sizeof sock_opts[0]; |
119 |
|
|
120 |
|
struct conn_heritage { |
121 |
|
unsigned sess_set; |
122 |
|
unsigned listen_mod; |
123 |
|
}; |
124 |
|
|
125 |
|
/*-------------------------------------------------------------------- |
126 |
|
* We want to get out of any kind of trouble-hit TCP connections as fast |
127 |
|
* as absolutely possible, so we set them LINGER disabled, so that even if |
128 |
|
* there are outstanding write data on the socket, a close(2) will return |
129 |
|
* immediately. |
130 |
|
*/ |
131 |
|
static const struct linger disable_so_linger = { |
132 |
|
.l_onoff = 0, |
133 |
|
}; |
134 |
|
|
135 |
|
/* |
136 |
|
* We turn on keepalives by default to assist in detecting clients that have |
137 |
|
* hung up on connections returning from waitinglists |
138 |
|
*/ |
139 |
|
|
140 |
|
static const unsigned enable_so_keepalive = 1; |
141 |
|
|
142 |
|
/* We disable Nagle's algorithm in favor of low latency setups. |
143 |
|
*/ |
144 |
|
|
145 |
|
static const unsigned enable_tcp_nodelay = 1; |
146 |
|
|
147 |
|
/*-------------------------------------------------------------------- |
148 |
|
* lacking a better place, we put some generic periodic updates |
149 |
|
* into the vca_acct() loop which we are running anyway |
150 |
|
*/ |
151 |
|
static void |
152 |
41917 |
vca_periodic(vtim_real t0) |
153 |
|
{ |
154 |
|
vtim_real now; |
155 |
|
|
156 |
41917 |
now = VTIM_real(); |
157 |
41917 |
VSC_C_main->uptime = (uint64_t)(now - t0); |
158 |
|
|
159 |
41917 |
VTIM_postel = FEATURE(FEATURE_HTTP_DATE_POSTEL); |
160 |
41917 |
} |
161 |
|
|
162 |
|
/*-------------------------------------------------------------------- |
163 |
|
* Some kernels have bugs/limitations with respect to which options are |
164 |
|
* inherited from the accept/listen socket, so we have to keep track of |
165 |
|
* which, if any, sockopts we have to set on the accepted socket. |
166 |
|
*/ |
167 |
|
|
168 |
|
static int |
169 |
41917 |
vca_sock_opt_init(void) |
170 |
|
{ |
171 |
|
struct sock_opt *so; |
172 |
|
union sock_arg tmp; |
173 |
41917 |
int n, chg = 0; |
174 |
|
size_t sz; |
175 |
|
|
176 |
41917 |
memset(&tmp, 0, sizeof tmp); |
177 |
|
|
178 |
377253 |
for (n = 0; n < n_sock_opts; n++) { |
179 |
335336 |
so = &sock_opts[n]; |
180 |
|
|
181 |
|
#define SET_VAL(nm, so, fld, val) \ |
182 |
|
do { \ |
183 |
|
if (!strcmp(#nm, so->strname)) { \ |
184 |
|
assert(so->sz == sizeof so->arg->fld); \ |
185 |
|
so->arg->fld = (val); \ |
186 |
|
} \ |
187 |
|
} while (0) |
188 |
|
|
189 |
|
#define NEW_VAL(nm, so, fld, val) \ |
190 |
|
do { \ |
191 |
|
if (!strcmp(#nm, so->strname)) { \ |
192 |
|
sz = sizeof tmp.fld; \ |
193 |
|
assert(so->sz == sz); \ |
194 |
|
tmp.fld = (val); \ |
195 |
|
if (memcmp(&so->arg->fld, &(tmp.fld), sz)) { \ |
196 |
|
memcpy(&so->arg->fld, &(tmp.fld), sz); \ |
197 |
|
so->mod++; \ |
198 |
|
chg = 1; \ |
199 |
|
} \ |
200 |
|
} \ |
201 |
|
} while (0) |
202 |
|
|
203 |
335336 |
SET_VAL(SO_LINGER, so, lg, disable_so_linger); |
204 |
335336 |
SET_VAL(SO_KEEPALIVE, so, i, enable_so_keepalive); |
205 |
335336 |
NEW_VAL(SO_SNDTIMEO, so, tv, |
206 |
|
VTIM_timeval(cache_param->idle_send_timeout)); |
207 |
335336 |
NEW_VAL(SO_RCVTIMEO, so, tv, |
208 |
|
VTIM_timeval(cache_param->timeout_idle)); |
209 |
335336 |
SET_VAL(TCP_NODELAY, so, i, enable_tcp_nodelay); |
210 |
|
#if defined(HAVE_TCP_KEEP) |
211 |
335336 |
NEW_VAL(TCP_KEEPIDLE, so, i, |
212 |
|
(int)cache_param->tcp_keepalive_time); |
213 |
335336 |
NEW_VAL(TCP_KEEPCNT, so, i, |
214 |
|
(int)cache_param->tcp_keepalive_probes); |
215 |
335336 |
NEW_VAL(TCP_KEEPINTVL, so, i, |
216 |
|
(int)cache_param->tcp_keepalive_intvl); |
217 |
|
#elif defined(HAVE_TCP_KEEPALIVE) |
218 |
|
NEW_VAL(TCP_KEEPALIVE, so, i, |
219 |
|
(int)cache_param->tcp_keepalive_time); |
220 |
|
#endif |
221 |
335336 |
} |
222 |
41917 |
return (chg); |
223 |
|
} |
224 |
|
|
225 |
|
static void |
226 |
20877 |
vca_sock_opt_test(const struct listen_sock *ls, const struct sess *sp) |
227 |
|
{ |
228 |
|
struct conn_heritage *ch; |
229 |
|
struct sock_opt *so; |
230 |
|
union sock_arg tmp; |
231 |
|
socklen_t l; |
232 |
|
int i, n; |
233 |
|
|
234 |
20877 |
CHECK_OBJ_NOTNULL(ls, LISTEN_SOCK_MAGIC); |
235 |
20877 |
CHECK_OBJ_NOTNULL(sp, SESS_MAGIC); |
236 |
|
|
237 |
187853 |
for (n = 0; n < n_sock_opts; n++) { |
238 |
167042 |
so = &sock_opts[n]; |
239 |
167042 |
ch = &ls->conn_heritage[n]; |
240 |
167042 |
if (ch->sess_set) { |
241 |
1380 |
VSL(SLT_Debug, sp->vxid, |
242 |
|
"sockopt: Not testing nonhereditary %s for %s=%s", |
243 |
690 |
so->strname, ls->name, ls->endpoint); |
244 |
690 |
continue; |
245 |
|
} |
246 |
166352 |
if (so->level == IPPROTO_TCP && ls->uds) { |
247 |
4880 |
VSL(SLT_Debug, sp->vxid, |
248 |
|
"sockopt: Not testing incompatible %s for %s=%s", |
249 |
2440 |
so->strname, ls->name, ls->endpoint); |
250 |
2440 |
continue; |
251 |
|
} |
252 |
163912 |
memset(&tmp, 0, sizeof tmp); |
253 |
163912 |
l = so->sz; |
254 |
163912 |
i = getsockopt(sp->fd, so->level, so->optname, &tmp, &l); |
255 |
163912 |
if (i == 0 && memcmp(&tmp, so->arg, so->sz)) { |
256 |
80908 |
VSL(SLT_Debug, sp->vxid, |
257 |
|
"sockopt: Test confirmed %s non heredity for %s=%s", |
258 |
40454 |
so->strname, ls->name, ls->endpoint); |
259 |
40454 |
ch->sess_set = 1; |
260 |
40454 |
} |
261 |
163912 |
if (i && errno != ENOPROTOOPT) |
262 |
0 |
VTCP_Assert(i); |
263 |
163846 |
} |
264 |
20811 |
} |
265 |
|
|
266 |
|
static void |
267 |
72816 |
vca_sock_opt_set(const struct listen_sock *ls, const struct sess *sp) |
268 |
|
{ |
269 |
|
struct conn_heritage *ch; |
270 |
|
struct sock_opt *so; |
271 |
|
vxid_t vxid; |
272 |
|
int n, sock; |
273 |
|
|
274 |
72816 |
CHECK_OBJ_NOTNULL(ls, LISTEN_SOCK_MAGIC); |
275 |
|
|
276 |
72816 |
if (sp != NULL) { |
277 |
50464 |
CHECK_OBJ(sp, SESS_MAGIC); |
278 |
50464 |
sock = sp->fd; |
279 |
50464 |
vxid = sp->vxid; |
280 |
50464 |
} else { |
281 |
22352 |
sock = ls->sock; |
282 |
22352 |
vxid = NO_VXID; |
283 |
|
} |
284 |
|
|
285 |
655308 |
for (n = 0; n < n_sock_opts; n++) { |
286 |
582492 |
so = &sock_opts[n]; |
287 |
582492 |
ch = &ls->conn_heritage[n]; |
288 |
582492 |
if (so->level == IPPROTO_TCP && ls->uds) { |
289 |
26528 |
VSL(SLT_Debug, vxid, |
290 |
|
"sockopt: Not setting incompatible %s for %s=%s", |
291 |
26528 |
so->strname, ls->name, ls->endpoint); |
292 |
26528 |
continue; |
293 |
|
} |
294 |
555964 |
if (sp == NULL && ch->listen_mod == so->mod) { |
295 |
789 |
VSL(SLT_Debug, vxid, |
296 |
|
"sockopt: Not setting unmodified %s for %s=%s", |
297 |
789 |
so->strname, ls->name, ls->endpoint); |
298 |
789 |
continue; |
299 |
|
} |
300 |
555175 |
if (sp != NULL && !ch->sess_set) { |
301 |
569616 |
VSL(SLT_Debug, sp->vxid, |
302 |
|
"sockopt: %s may be inherited for %s=%s", |
303 |
284808 |
so->strname, ls->name, ls->endpoint); |
304 |
284808 |
continue; |
305 |
|
} |
306 |
270367 |
VSL(SLT_Debug, vxid, |
307 |
|
"sockopt: Setting %s for %s=%s", |
308 |
270367 |
so->strname, ls->name, ls->endpoint); |
309 |
270367 |
VTCP_Assert(setsockopt(sock, |
310 |
|
so->level, so->optname, so->arg, so->sz)); |
311 |
270367 |
if (sp == NULL) |
312 |
175427 |
ch->listen_mod = so->mod; |
313 |
270367 |
} |
314 |
72816 |
} |
315 |
|
|
316 |
|
/*-------------------------------------------------------------------- |
317 |
|
* If accept(2)'ing fails, we pace ourselves to relive any resource |
318 |
|
* shortage if possible. |
319 |
|
*/ |
320 |
|
|
321 |
|
static void |
322 |
94381 |
vca_pace_check(void) |
323 |
|
{ |
324 |
|
vtim_dur p; |
325 |
|
|
326 |
94381 |
if (vca_pace == 0.0) |
327 |
94381 |
return; |
328 |
0 |
Lck_Lock(&pace_mtx); |
329 |
0 |
p = vca_pace; |
330 |
0 |
Lck_Unlock(&pace_mtx); |
331 |
0 |
if (p > 0.0) |
332 |
0 |
VTIM_sleep(p); |
333 |
94381 |
} |
334 |
|
|
335 |
|
static void |
336 |
0 |
vca_pace_bad(void) |
337 |
|
{ |
338 |
|
|
339 |
0 |
Lck_Lock(&pace_mtx); |
340 |
0 |
vca_pace += cache_param->acceptor_sleep_incr; |
341 |
0 |
if (vca_pace > cache_param->acceptor_sleep_max) |
342 |
0 |
vca_pace = cache_param->acceptor_sleep_max; |
343 |
0 |
Lck_Unlock(&pace_mtx); |
344 |
0 |
} |
345 |
|
|
346 |
|
static void |
347 |
50465 |
vca_pace_good(void) |
348 |
|
{ |
349 |
|
|
350 |
50465 |
if (vca_pace == 0.0) |
351 |
50465 |
return; |
352 |
0 |
Lck_Lock(&pace_mtx); |
353 |
0 |
vca_pace *= cache_param->acceptor_sleep_decay; |
354 |
0 |
if (vca_pace < cache_param->acceptor_sleep_incr) |
355 |
0 |
vca_pace = 0.0; |
356 |
0 |
Lck_Unlock(&pace_mtx); |
357 |
50465 |
} |
358 |
|
|
359 |
|
/*-------------------------------------------------------------------- |
360 |
|
* The pool-task for a newly accepted session |
361 |
|
* |
362 |
|
* Called from assigned worker thread |
363 |
|
*/ |
364 |
|
|
365 |
|
static void |
366 |
44475 |
vca_mk_tcp(const struct wrk_accept *wa, |
367 |
|
struct sess *sp, char *laddr, char *lport, char *raddr, char *rport) |
368 |
|
{ |
369 |
44475 |
struct suckaddr *sa = NULL; |
370 |
|
ssize_t sz; |
371 |
|
|
372 |
44475 |
AN(SES_Reserve_remote_addr(sp, &sa, &sz)); |
373 |
44475 |
AN(sa); |
374 |
44475 |
assert(sz == vsa_suckaddr_len); |
375 |
44475 |
AN(VSA_Build(sa, &wa->acceptaddr, wa->acceptaddrlen)); |
376 |
44475 |
sp->sattr[SA_CLIENT_ADDR] = sp->sattr[SA_REMOTE_ADDR]; |
377 |
|
|
378 |
44475 |
VTCP_name(sa, raddr, VTCP_ADDRBUFSIZE, rport, VTCP_PORTBUFSIZE); |
379 |
44475 |
AN(SES_Set_String_Attr(sp, SA_CLIENT_IP, raddr)); |
380 |
44475 |
AN(SES_Set_String_Attr(sp, SA_CLIENT_PORT, rport)); |
381 |
|
|
382 |
|
|
383 |
44475 |
AN(SES_Reserve_local_addr(sp, &sa, &sz)); |
384 |
44475 |
AN(VSA_getsockname(sp->fd, sa, sz)); |
385 |
44475 |
sp->sattr[SA_SERVER_ADDR] = sp->sattr[SA_LOCAL_ADDR]; |
386 |
44475 |
VTCP_name(sa, laddr, VTCP_ADDRBUFSIZE, lport, VTCP_PORTBUFSIZE); |
387 |
44475 |
} |
388 |
|
|
389 |
|
static void |
390 |
5982 |
vca_mk_uds(struct wrk_accept *wa, struct sess *sp, char *laddr, char *lport, |
391 |
|
char *raddr, char *rport) |
392 |
|
{ |
393 |
5982 |
struct suckaddr *sa = NULL; |
394 |
|
ssize_t sz; |
395 |
|
|
396 |
5982 |
(void) wa; |
397 |
5982 |
AN(SES_Reserve_remote_addr(sp, &sa, &sz)); |
398 |
5982 |
AN(sa); |
399 |
5982 |
assert(sz == vsa_suckaddr_len); |
400 |
5982 |
AZ(SES_Set_remote_addr(sp, bogo_ip)); |
401 |
5982 |
sp->sattr[SA_CLIENT_ADDR] = sp->sattr[SA_REMOTE_ADDR]; |
402 |
5982 |
sp->sattr[SA_LOCAL_ADDR] = sp->sattr[SA_REMOTE_ADDR]; |
403 |
5982 |
sp->sattr[SA_SERVER_ADDR] = sp->sattr[SA_REMOTE_ADDR]; |
404 |
5982 |
AN(SES_Set_String_Attr(sp, SA_CLIENT_IP, "0.0.0.0")); |
405 |
5982 |
AN(SES_Set_String_Attr(sp, SA_CLIENT_PORT, "0")); |
406 |
|
|
407 |
5982 |
strcpy(laddr, "0.0.0.0"); |
408 |
5982 |
strcpy(raddr, "0.0.0.0"); |
409 |
5982 |
strcpy(lport, "0"); |
410 |
5982 |
strcpy(rport, "0"); |
411 |
5982 |
} |
412 |
|
|
413 |
|
static void v_matchproto_(task_func_t) |
414 |
50464 |
vca_make_session(struct worker *wrk, void *arg) |
415 |
|
{ |
416 |
|
struct sess *sp; |
417 |
|
struct req *req; |
418 |
|
struct wrk_accept *wa; |
419 |
|
char laddr[VTCP_ADDRBUFSIZE]; |
420 |
|
char lport[VTCP_PORTBUFSIZE]; |
421 |
|
char raddr[VTCP_ADDRBUFSIZE]; |
422 |
|
char rport[VTCP_PORTBUFSIZE]; |
423 |
|
|
424 |
50464 |
CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); |
425 |
50464 |
CAST_OBJ_NOTNULL(wa, arg, WRK_ACCEPT_MAGIC); |
426 |
|
|
427 |
50464 |
VTCP_blocking(wa->acceptsock); |
428 |
|
|
429 |
|
/* Turn accepted socket into a session */ |
430 |
50464 |
AN(WS_Reservation(wrk->aws)); |
431 |
50464 |
sp = SES_New(wrk->pool); |
432 |
50464 |
CHECK_OBJ_NOTNULL(sp, SESS_MAGIC); |
433 |
50464 |
wrk->stats->s_sess++; |
434 |
|
|
435 |
50464 |
sp->t_open = VTIM_real(); |
436 |
50464 |
sp->t_idle = sp->t_open; |
437 |
50464 |
sp->vxid = VXID_Get(wrk, VSL_CLIENTMARKER); |
438 |
|
|
439 |
50464 |
sp->fd = wa->acceptsock; |
440 |
50464 |
wa->acceptsock = -1; |
441 |
50464 |
sp->listen_sock = wa->acceptlsock; |
442 |
|
|
443 |
50464 |
assert((size_t)wa->acceptaddrlen <= vsa_suckaddr_len); |
444 |
|
|
445 |
50464 |
if (wa->acceptlsock->uds) |
446 |
5982 |
vca_mk_uds(wa, sp, laddr, lport, raddr, rport); |
447 |
|
else |
448 |
44482 |
vca_mk_tcp(wa, sp, laddr, lport, raddr, rport); |
449 |
|
|
450 |
50464 |
AN(wa->acceptlsock->name); |
451 |
100928 |
VSL(SLT_Begin, sp->vxid, "sess 0 %s", |
452 |
50464 |
wa->acceptlsock->transport->name); |
453 |
100928 |
VSL(SLT_SessOpen, sp->vxid, "%s %s %s %s %s %.6f %d", |
454 |
50464 |
raddr, rport, wa->acceptlsock->name, laddr, lport, |
455 |
50464 |
sp->t_open, sp->fd); |
456 |
|
|
457 |
50464 |
vca_pace_good(); |
458 |
50464 |
wrk->stats->sess_conn++; |
459 |
|
|
460 |
50464 |
if (wa->acceptlsock->test_heritage) { |
461 |
20877 |
vca_sock_opt_test(wa->acceptlsock, sp); |
462 |
20877 |
wa->acceptlsock->test_heritage = 0; |
463 |
20877 |
} |
464 |
50464 |
vca_sock_opt_set(wa->acceptlsock, sp); |
465 |
|
|
466 |
50464 |
req = Req_New(sp); |
467 |
50464 |
CHECK_OBJ_NOTNULL(req, REQ_MAGIC); |
468 |
50464 |
req->htc->rfd = &sp->fd; |
469 |
|
|
470 |
50464 |
SES_SetTransport(wrk, sp, req, wa->acceptlsock->transport); |
471 |
50464 |
WS_Release(wrk->aws, 0); |
472 |
50464 |
} |
473 |
|
|
474 |
|
/*-------------------------------------------------------------------- |
475 |
|
* This function accepts on a single socket for a single thread pool. |
476 |
|
* |
477 |
|
* As long as we can stick the accepted connection to another thread |
478 |
|
* we do so, otherwise we put the socket back on the "BACK" pool |
479 |
|
* and handle the new connection ourselves. |
480 |
|
*/ |
481 |
|
|
482 |
|
static void v_matchproto_(task_func_t) |
483 |
83 |
vca_accept_task(struct worker *wrk, void *arg) |
484 |
|
{ |
485 |
|
struct wrk_accept wa; |
486 |
|
struct poolsock *ps; |
487 |
|
struct listen_sock *ls; |
488 |
|
int i; |
489 |
|
char laddr[VTCP_ADDRBUFSIZE]; |
490 |
|
char lport[VTCP_PORTBUFSIZE]; |
491 |
|
|
492 |
83 |
CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); |
493 |
83 |
CAST_OBJ_NOTNULL(ps, arg, POOLSOCK_MAGIC); |
494 |
83 |
ls = ps->lsock; |
495 |
83 |
CHECK_OBJ_NOTNULL(ls, LISTEN_SOCK_MAGIC); |
496 |
|
|
497 |
39651 |
while (!pool_accepting) |
498 |
39568 |
VTIM_sleep(.1); |
499 |
|
|
500 |
|
/* Dont hold on to (possibly) discarded VCLs */ |
501 |
83 |
if (wrk->wpriv->vcl != NULL) |
502 |
2 |
VCL_Rel(&wrk->wpriv->vcl); |
503 |
|
|
504 |
50487 |
while (!ps->pool->die) { |
505 |
50437 |
INIT_OBJ(&wa, WRK_ACCEPT_MAGIC); |
506 |
50437 |
wa.acceptlsock = ls; |
507 |
|
|
508 |
50437 |
vca_pace_check(); |
509 |
|
|
510 |
50437 |
wa.acceptaddrlen = sizeof wa.acceptaddr; |
511 |
50437 |
do { |
512 |
188758 |
i = accept(ls->sock, (void*)&wa.acceptaddr, |
513 |
94379 |
&wa.acceptaddrlen); |
514 |
94379 |
} while (i < 0 && errno == EAGAIN && !ps->pool->die); |
515 |
|
|
516 |
50437 |
if (i < 0 && ps->pool->die) |
517 |
0 |
break; |
518 |
|
|
519 |
50437 |
if (i < 0 && ls->sock == -2) { |
520 |
|
/* Shut down in progress */ |
521 |
0 |
sleep(2); |
522 |
0 |
continue; |
523 |
|
} |
524 |
|
|
525 |
50437 |
if (i < 0) { |
526 |
0 |
switch (errno) { |
527 |
|
case ECONNABORTED: |
528 |
0 |
wrk->stats->sess_fail_econnaborted++; |
529 |
0 |
break; |
530 |
|
case EINTR: |
531 |
0 |
wrk->stats->sess_fail_eintr++; |
532 |
0 |
break; |
533 |
|
case EMFILE: |
534 |
0 |
wrk->stats->sess_fail_emfile++; |
535 |
0 |
vca_pace_bad(); |
536 |
0 |
break; |
537 |
|
case EBADF: |
538 |
0 |
wrk->stats->sess_fail_ebadf++; |
539 |
0 |
vca_pace_bad(); |
540 |
0 |
break; |
541 |
|
case ENOBUFS: |
542 |
|
case ENOMEM: |
543 |
0 |
wrk->stats->sess_fail_enomem++; |
544 |
0 |
vca_pace_bad(); |
545 |
0 |
break; |
546 |
|
default: |
547 |
0 |
wrk->stats->sess_fail_other++; |
548 |
0 |
vca_pace_bad(); |
549 |
0 |
break; |
550 |
|
} |
551 |
|
|
552 |
0 |
i = errno; |
553 |
0 |
wrk->stats->sess_fail++; |
554 |
|
|
555 |
0 |
if (wa.acceptlsock->uds) { |
556 |
0 |
bstrcpy(laddr, "0.0.0.0"); |
557 |
0 |
bstrcpy(lport, "0"); |
558 |
0 |
} else { |
559 |
0 |
VTCP_myname(ls->sock, laddr, VTCP_ADDRBUFSIZE, |
560 |
0 |
lport, VTCP_PORTBUFSIZE); |
561 |
|
} |
562 |
|
|
563 |
0 |
VSL(SLT_SessError, NO_VXID, "%s %s %s %d %d \"%s\"", |
564 |
0 |
wa.acceptlsock->name, laddr, lport, |
565 |
0 |
ls->sock, i, VAS_errtxt(i)); |
566 |
0 |
(void)Pool_TrySumstat(wrk); |
567 |
0 |
continue; |
568 |
|
} |
569 |
|
|
570 |
50437 |
wa.acceptsock = i; |
571 |
|
|
572 |
50437 |
if (!Pool_Task_Arg(wrk, TASK_QUEUE_REQ, |
573 |
|
vca_make_session, &wa, sizeof wa)) { |
574 |
|
/* |
575 |
|
* We couldn't get another thread, so we will handle |
576 |
|
* the request in this worker thread, but first we |
577 |
|
* must reschedule the listening task so it will be |
578 |
|
* taken up by another thread again. |
579 |
|
*/ |
580 |
83 |
if (!ps->pool->die) { |
581 |
33 |
AZ(Pool_Task(wrk->pool, ps->task, |
582 |
|
TASK_QUEUE_VCA)); |
583 |
33 |
return; |
584 |
|
} |
585 |
50 |
} |
586 |
50404 |
if (!ps->pool->die && DO_DEBUG(DBG_SLOW_ACCEPTOR)) |
587 |
150 |
VTIM_sleep(2.0); |
588 |
|
|
589 |
|
} |
590 |
|
|
591 |
50 |
VSL(SLT_Debug, NO_VXID, "XXX Accept thread dies %p", ps); |
592 |
50 |
FREE_OBJ(ps); |
593 |
83 |
} |
594 |
|
|
595 |
|
/*-------------------------------------------------------------------- |
596 |
|
* Called when a worker and attached thread pool is created, to |
597 |
|
* allocate the tasks which will listen to sockets for that pool. |
598 |
|
*/ |
599 |
|
|
600 |
|
void |
601 |
43944 |
VCA_NewPool(struct pool *pp) |
602 |
|
{ |
603 |
|
struct listen_sock *ls; |
604 |
|
struct poolsock *ps; |
605 |
|
|
606 |
88284 |
VTAILQ_FOREACH(ls, &heritage.socks, list) { |
607 |
44340 |
ALLOC_OBJ(ps, POOLSOCK_MAGIC); |
608 |
44340 |
AN(ps); |
609 |
44340 |
ps->lsock = ls; |
610 |
44340 |
ps->task->func = vca_accept_task; |
611 |
44340 |
ps->task->priv = ps; |
612 |
44340 |
ps->pool = pp; |
613 |
44340 |
VTAILQ_INSERT_TAIL(&pp->poolsocks, ps, list); |
614 |
44340 |
AZ(Pool_Task(pp, ps->task, TASK_QUEUE_VCA)); |
615 |
44340 |
} |
616 |
43944 |
} |
617 |
|
|
618 |
|
void |
619 |
50 |
VCA_DestroyPool(struct pool *pp) |
620 |
|
{ |
621 |
|
struct poolsock *ps; |
622 |
|
|
623 |
100 |
while (!VTAILQ_EMPTY(&pp->poolsocks)) { |
624 |
50 |
ps = VTAILQ_FIRST(&pp->poolsocks); |
625 |
50 |
VTAILQ_REMOVE(&pp->poolsocks, ps, list); |
626 |
|
} |
627 |
50 |
} |
628 |
|
|
629 |
|
/*--------------------------------------------------------------------*/ |
630 |
|
|
631 |
|
static void * v_matchproto_() |
632 |
22075 |
vca_acct(void *arg) |
633 |
|
{ |
634 |
|
struct listen_sock *ls; |
635 |
|
vtim_real t0; |
636 |
|
|
637 |
|
// XXX Actually a mis-nomer now because the accept happens in a pool |
638 |
|
// thread. Rename to accept-nanny or so? |
639 |
22075 |
THR_SetName("cache-acceptor"); |
640 |
22075 |
THR_Init(); |
641 |
22075 |
(void)arg; |
642 |
|
|
643 |
22075 |
t0 = VTIM_real(); |
644 |
22075 |
vca_periodic(t0); |
645 |
|
|
646 |
22075 |
pool_accepting = 1; |
647 |
|
|
648 |
41917 |
while (1) { |
649 |
41917 |
(void)sleep(1); |
650 |
41917 |
if (vca_sock_opt_init()) { |
651 |
22202 |
PTOK(pthread_mutex_lock(&shut_mtx)); |
652 |
22329 |
VTAILQ_FOREACH(ls, &heritage.socks, list) { |
653 |
22202 |
if (ls->sock == -2) |
654 |
0 |
continue; // VCA_Shutdown |
655 |
22202 |
assert (ls->sock > 0); |
656 |
127 |
vca_sock_opt_set(ls, NULL); |
657 |
|
/* If one of the options on a socket has |
658 |
|
* changed, also force a retest of whether |
659 |
|
* the values are inherited to the |
660 |
|
* accepted sockets. This should then |
661 |
|
* catch any false positives from previous |
662 |
|
* tests that could happen if the set |
663 |
|
* value of an option happened to just be |
664 |
|
* the OS default for that value, and |
665 |
|
* wasn't actually inherited from the |
666 |
|
* listening socket. */ |
667 |
127 |
ls->test_heritage = 1; |
668 |
127 |
} |
669 |
127 |
PTOK(pthread_mutex_unlock(&shut_mtx)); |
670 |
127 |
} |
671 |
19842 |
vca_periodic(t0); |
672 |
|
} |
673 |
|
NEEDLESS(return (NULL)); |
674 |
|
} |
675 |
|
|
676 |
|
/*--------------------------------------------------------------------*/ |
677 |
|
|
678 |
|
static void v_matchproto_(cli_func_t) |
679 |
22075 |
ccf_start(struct cli *cli, const char * const *av, void *priv) |
680 |
|
{ |
681 |
|
struct listen_sock *ls; |
682 |
|
|
683 |
22075 |
(void)cli; |
684 |
22075 |
(void)av; |
685 |
22075 |
(void)priv; |
686 |
|
|
687 |
22075 |
(void)vca_sock_opt_init(); |
688 |
|
|
689 |
44300 |
VTAILQ_FOREACH(ls, &heritage.socks, list) { |
690 |
22225 |
CHECK_OBJ_NOTNULL(ls->transport, TRANSPORT_MAGIC); |
691 |
22225 |
assert (ls->sock > 0); // We know where stdin is |
692 |
22225 |
if (cache_param->tcp_fastopen && |
693 |
0 |
VTCP_fastopen(ls->sock, cache_param->listen_depth)) |
694 |
0 |
VSL(SLT_Error, NO_VXID, |
695 |
|
"Kernel TCP Fast Open: sock=%d, errno=%d %s", |
696 |
0 |
ls->sock, errno, VAS_errtxt(errno)); |
697 |
22225 |
if (listen(ls->sock, cache_param->listen_depth)) { |
698 |
0 |
VCLI_SetResult(cli, CLIS_CANT); |
699 |
0 |
VCLI_Out(cli, "Listen failed on socket '%s': %s", |
700 |
0 |
ls->endpoint, VAS_errtxt(errno)); |
701 |
0 |
return; |
702 |
|
} |
703 |
22225 |
AZ(ls->conn_heritage); |
704 |
22225 |
ls->conn_heritage = calloc(n_sock_opts, |
705 |
|
sizeof *ls->conn_heritage); |
706 |
22225 |
AN(ls->conn_heritage); |
707 |
22225 |
ls->test_heritage = 1; |
708 |
22225 |
vca_sock_opt_set(ls, NULL); |
709 |
22225 |
if (cache_param->accept_filter && VTCP_filter_http(ls->sock)) |
710 |
44150 |
VSL(SLT_Error, NO_VXID, |
711 |
|
"Kernel filtering: sock=%d, errno=%d %s", |
712 |
22075 |
ls->sock, errno, VAS_errtxt(errno)); |
713 |
22225 |
} |
714 |
|
|
715 |
22075 |
PTOK(pthread_create(&VCA_thread, NULL, vca_acct, NULL)); |
716 |
22075 |
} |
717 |
|
|
718 |
|
/*--------------------------------------------------------------------*/ |
719 |
|
|
720 |
|
static void v_matchproto_(cli_func_t) |
721 |
43675 |
ccf_listen_address(struct cli *cli, const char * const *av, void *priv) |
722 |
|
{ |
723 |
|
struct listen_sock *ls; |
724 |
|
char h[VTCP_ADDRBUFSIZE], p[VTCP_PORTBUFSIZE]; |
725 |
|
|
726 |
43675 |
(void)cli; |
727 |
43675 |
(void)av; |
728 |
43675 |
(void)priv; |
729 |
|
|
730 |
|
/* |
731 |
|
* This CLI command is primarily used by varnishtest. Don't |
732 |
|
* respond until listen(2) has been called, in order to avoid |
733 |
|
* a race where varnishtest::client would attempt to connect(2) |
734 |
|
* before listen(2) has been called. |
735 |
|
*/ |
736 |
43675 |
while (!pool_accepting) |
737 |
0 |
VTIM_sleep(.1); |
738 |
|
|
739 |
43675 |
PTOK(pthread_mutex_lock(&shut_mtx)); |
740 |
87650 |
VTAILQ_FOREACH(ls, &heritage.socks, list) { |
741 |
43975 |
if (!ls->uds) { |
742 |
42725 |
VTCP_myname(ls->sock, h, sizeof h, p, sizeof p); |
743 |
42725 |
VCLI_Out(cli, "%s %s %s\n", ls->name, h, p); |
744 |
42725 |
} |
745 |
|
else |
746 |
1250 |
VCLI_Out(cli, "%s %s -\n", ls->name, ls->endpoint); |
747 |
43975 |
} |
748 |
43675 |
PTOK(pthread_mutex_unlock(&shut_mtx)); |
749 |
43675 |
} |
750 |
|
|
751 |
|
/*--------------------------------------------------------------------*/ |
752 |
|
|
753 |
|
static struct cli_proto vca_cmds[] = { |
754 |
|
{ CLICMD_SERVER_START, "", ccf_start }, |
755 |
|
{ CLICMD_DEBUG_LISTEN_ADDRESS, "d", ccf_listen_address }, |
756 |
|
{ NULL } |
757 |
|
}; |
758 |
|
|
759 |
|
void |
760 |
22194 |
VCA_Init(void) |
761 |
|
{ |
762 |
|
|
763 |
22194 |
CLI_AddFuncs(vca_cmds); |
764 |
22194 |
Lck_New(&pace_mtx, lck_vcapace); |
765 |
22194 |
} |
766 |
|
|
767 |
|
void |
768 |
21900 |
VCA_Shutdown(void) |
769 |
|
{ |
770 |
|
struct listen_sock *ls; |
771 |
|
int i; |
772 |
|
|
773 |
21900 |
PTOK(pthread_mutex_lock(&shut_mtx)); |
774 |
43950 |
VTAILQ_FOREACH(ls, &heritage.socks, list) { |
775 |
22050 |
i = ls->sock; |
776 |
22050 |
ls->sock = -2; |
777 |
22050 |
(void)close(i); |
778 |
22050 |
} |
779 |
21900 |
PTOK(pthread_mutex_unlock(&shut_mtx)); |
780 |
21900 |
} |
781 |
|
|
782 |
|
/*-------------------------------------------------------------------- |
783 |
|
* Transport protocol registration |
784 |
|
* |
785 |
|
*/ |
786 |
|
|
787 |
|
static VTAILQ_HEAD(,transport) transports = |
788 |
|
VTAILQ_HEAD_INITIALIZER(transports); |
789 |
|
|
790 |
|
static uint16_t next_xport; |
791 |
|
|
792 |
|
static void |
793 |
72675 |
XPORT_Register(struct transport *xp) |
794 |
|
{ |
795 |
|
|
796 |
72675 |
CHECK_OBJ_NOTNULL(xp, TRANSPORT_MAGIC); |
797 |
72675 |
AZ(xp->number); |
798 |
|
|
799 |
72675 |
xp->number = ++next_xport; |
800 |
72675 |
VTAILQ_INSERT_TAIL(&transports, xp, list); |
801 |
72675 |
} |
802 |
|
|
803 |
|
void |
804 |
24225 |
XPORT_Init(void) |
805 |
|
{ |
806 |
|
|
807 |
24225 |
ASSERT_MGT(); |
808 |
|
|
809 |
|
#define TRANSPORT_MACRO(name) XPORT_Register(&name##_transport); |
810 |
24225 |
TRANSPORTS |
811 |
|
#undef TRANSPORT_MACRO |
812 |
24225 |
} |
813 |
|
|
814 |
|
const struct transport * |
815 |
23575 |
XPORT_Find(const char *name) |
816 |
|
{ |
817 |
|
const struct transport *xp; |
818 |
|
|
819 |
23575 |
ASSERT_MGT(); |
820 |
|
|
821 |
46800 |
VTAILQ_FOREACH(xp, &transports, list) |
822 |
46750 |
if (xp->proto_ident != NULL && |
823 |
46750 |
!strcasecmp(xp->proto_ident, name)) |
824 |
23525 |
return (xp); |
825 |
50 |
return (NULL); |
826 |
23575 |
} |
827 |
|
|
828 |
|
const struct transport * |
829 |
150 |
XPORT_ByNumber(uint16_t no) |
830 |
|
{ |
831 |
|
const struct transport *xp; |
832 |
|
|
833 |
325 |
VTAILQ_FOREACH(xp, &transports, list) |
834 |
325 |
if (xp->number == no) |
835 |
150 |
return (xp); |
836 |
0 |
return (NULL); |
837 |
150 |
} |