Fix reconnection hanging bugs

This commit is contained in:
Nicolas Viennot 2019-11-05 19:29:18 -05:00
parent fa49dc980d
commit c71307ed5c
4 changed files with 76 additions and 51 deletions

View File

@ -19,7 +19,7 @@ static int on_encoder_write(void *userdata, const char *buf, size_t len)
tmate_fatal("Cannot buffer encoded data"); tmate_fatal("Cannot buffer encoded data");
if (!encoder->ev_active) { if (!encoder->ev_active) {
event_active(&encoder->ev_buffer, EV_READ, 0); event_active(encoder->ev_buffer, EV_READ, 0);
encoder->ev_active = true; encoder->ev_active = true;
} }
@ -57,10 +57,12 @@ void tmate_encoder_init(struct tmate_encoder *encoder,
if (!encoder->buffer) if (!encoder->buffer)
tmate_fatal("Can't allocate buffer"); tmate_fatal("Can't allocate buffer");
event_set(&encoder->ev_buffer, -1, encoder->ev_buffer = event_new(tmate_session.ev_base, -1,
EV_READ | EV_PERSIST, on_encoder_buffer_ready, encoder); EV_READ | EV_PERSIST, on_encoder_buffer_ready, encoder);
if (!encoder->ev_buffer)
tmate_fatal("Can't allocate event");
event_add(&encoder->ev_buffer, NULL); event_add(encoder->ev_buffer, NULL);
encoder->ev_active = false; encoder->ev_active = false;
} }
@ -69,7 +71,8 @@ void tmate_encoder_destroy(struct tmate_encoder *encoder)
{ {
/* encoder->pk doesn't need any cleanup */ /* encoder->pk doesn't need any cleanup */
evbuffer_free(encoder->buffer); evbuffer_free(encoder->buffer);
event_del(&encoder->ev_buffer); event_del(encoder->ev_buffer);
event_free(encoder->ev_buffer);
memset(encoder, 0, sizeof(*encoder)); memset(encoder, 0, sizeof(*encoder));
} }

View File

@ -20,35 +20,50 @@ struct tmate_session tmate_session;
static void lookup_and_connect(void); static void lookup_and_connect(void);
static void on_dns_retry(__unused evutil_socket_t fd, __unused short what, static void on_dns_retry(__unused evutil_socket_t fd, __unused short what,
__unused void *arg) void *arg)
{ {
struct tmate_session *session = arg;
assert(session->ev_dns_retry);
event_free(session->ev_dns_retry);
session->ev_dns_retry = NULL;
lookup_and_connect(); lookup_and_connect();
} }
static void dns_cb(int errcode, struct evutil_addrinfo *addr, void *ptr) static void dns_cb(int errcode, struct evutil_addrinfo *addr, void *ptr)
{ {
struct evutil_addrinfo *ai; struct evutil_addrinfo *ai;
struct timeval tv;
const char *host = ptr; const char *host = ptr;
if (errcode) { if (errcode) {
struct tmate_session *session = &tmate_session;
if (session->ev_dns_retry)
return;
struct timeval tv = { .tv_sec = TMATE_DNS_RETRY_TIMEOUT, .tv_usec = 0 };
session->ev_dns_retry = evtimer_new(session->ev_base, on_dns_retry, session);
if (!session->ev_dns_retry)
tmate_fatal("out of memory");
evtimer_add(session->ev_dns_retry, &tv);
tmate_status_message("%s lookup failure. Retrying in %d seconds (%s)", tmate_status_message("%s lookup failure. Retrying in %d seconds (%s)",
host, TMATE_DNS_RETRY_TIMEOUT, host, TMATE_DNS_RETRY_TIMEOUT,
evutil_gai_strerror(errcode)); evutil_gai_strerror(errcode));
tv.tv_sec = TMATE_DNS_RETRY_TIMEOUT;
tv.tv_usec = 0;
evtimer_assign(&tmate_session.ev_dns_retry, tmate_session.ev_base,
on_dns_retry, NULL);
evtimer_add(&tmate_session.ev_dns_retry, &tv);
return; return;
} }
tmate_status_message("Connecting to %s...", host); tmate_status_message("Connecting to %s...", host);
for (ai = addr; ai; ai = ai->ai_next) { int i, num_clients = 0;
for (ai = addr; ai; ai = ai->ai_next)
num_clients++;
struct tmate_ssh_client *ssh_clients[num_clients];
for (ai = addr, i = 0; ai; ai = ai->ai_next, i++) {
char buf[128]; char buf[128];
const char *ip = NULL; const char *ip = NULL;
if (ai->ai_family == AF_INET) { if (ai->ai_family == AF_INET) {
@ -59,23 +74,16 @@ static void dns_cb(int errcode, struct evutil_addrinfo *addr, void *ptr)
ip = evutil_inet_ntop(AF_INET6, &sin6->sin6_addr, buf, 128); ip = evutil_inet_ntop(AF_INET6, &sin6->sin6_addr, buf, 128);
} }
tmate_debug("Trying server %s", ip); ssh_clients[i] = tmate_ssh_client_alloc(&tmate_session, ip);
/*
* Note: We don't deal with the client list. Clients manage it
* and free client structs when necessary.
*/
(void)tmate_ssh_client_alloc(&tmate_session, ip);
} }
for (i = 0; i < num_clients; i++)
connect_ssh_client(ssh_clients[i]);
evutil_freeaddrinfo(addr); evutil_freeaddrinfo(addr);
/* evdns_base_free(tmate_session.ev_dnsbase, 0);
* XXX For some reason, freeing the DNS resolver makes MacOSX flip out... tmate_session.ev_dnsbase = NULL;
* not sure what's going on...
* evdns_base_free(tmate_session.ev_dnsbase, 0);
* tmate_session.ev_dnsbase = NULL;
*/
} }
static void lookup_and_connect(void) static void lookup_and_connect(void)
@ -83,8 +91,8 @@ static void lookup_and_connect(void)
struct evutil_addrinfo hints; struct evutil_addrinfo hints;
const char *tmate_server_host; const char *tmate_server_host;
if (!tmate_session.ev_dnsbase) assert(!tmate_session.ev_dnsbase);
tmate_session.ev_dnsbase = evdns_base_new(tmate_session.ev_base, 1); tmate_session.ev_dnsbase = evdns_base_new(tmate_session.ev_base, 1);
if (!tmate_session.ev_dnsbase) if (!tmate_session.ev_dnsbase)
tmate_fatal("Cannot initialize the DNS lookup service"); tmate_fatal("Cannot initialize the DNS lookup service");
@ -191,12 +199,18 @@ static void on_reconnect_retry(__unused evutil_socket_t fd, __unused short what,
{ {
struct tmate_session *session = arg; struct tmate_session *session = arg;
assert(session->ev_connection_retry);
event_free(session->ev_connection_retry);
session->ev_connection_retry = NULL;
if (session->last_server_ip) { if (session->last_server_ip) {
/* /*
* We have a previous server ip. Let's try that again first, * We have a previous server ip. Let's try that again first,
* but then connect to any server if it fails again. * but then connect to any server if it fails again.
*/ */
(void)tmate_ssh_client_alloc(&tmate_session, session->last_server_ip); struct tmate_ssh_client *c = tmate_ssh_client_alloc(session,
session->last_server_ip);
connect_ssh_client(c);
free(session->last_server_ip); free(session->last_server_ip);
session->last_server_ip = NULL; session->last_server_ip = NULL;
} else { } else {
@ -214,18 +228,21 @@ void tmate_reconnect_session(struct tmate_session *session, const char *message)
*/ */
struct timeval tv = { .tv_sec = TMATE_RECONNECT_RETRY_TIMEOUT, .tv_usec = 0 }; struct timeval tv = { .tv_sec = TMATE_RECONNECT_RETRY_TIMEOUT, .tv_usec = 0 };
evtimer_assign(&session->ev_connection_retry, session->ev_base, if (session->ev_connection_retry)
on_reconnect_retry, session); return;
evtimer_add(&session->ev_connection_retry, &tv);
if (message) session->ev_connection_retry = evtimer_new(session->ev_base, on_reconnect_retry, session);
if (!session->ev_connection_retry)
tmate_fatal("out of memory");
evtimer_add(session->ev_connection_retry, &tv);
if (message && !tmate_foreground)
tmate_status_message("Reconnecting... (%s)", message); tmate_status_message("Reconnecting... (%s)", message);
else else
tmate_status_message("Reconnecting..."); tmate_status_message("Reconnecting...");
/* /*
* This says that we'll need to send a snapshot of the current state. * This says that we'll need to send a snapshot of the current state.
* Until we have persisted logs...
*/ */
session->reconnected = true; session->reconnected = true;
} }

View File

@ -210,8 +210,12 @@ static void init_conn_fd(struct tmate_ssh_client *client, bool tune_socket)
if (tune_socket) if (tune_socket)
tune_socket_opts(fd); tune_socket_opts(fd);
event_set(&client->ev_ssh, fd, EV_READ | EV_PERSIST, __on_ssh_client_event, client); assert(!client->ev_ssh);
event_add(&client->ev_ssh, NULL); client->ev_ssh = event_new(client->tmate_session->ev_base,
fd, EV_READ | EV_PERSIST, __on_ssh_client_event, client);
if (!client->ev_ssh)
tmate_fatal("out of memory");
event_add(client->ev_ssh, NULL);
client->has_init_conn_fd = true; client->has_init_conn_fd = true;
} }
@ -479,7 +483,8 @@ static void kill_ssh_client(struct tmate_ssh_client *client,
tmate_debug("SSH client killed (%s)", client->server_ip); tmate_debug("SSH client killed (%s)", client->server_ip);
if (client->has_init_conn_fd) { if (client->has_init_conn_fd) {
event_del(&client->ev_ssh); event_del(client->ev_ssh);
event_free(client->ev_ssh);
client->has_init_conn_fd = false; client->has_init_conn_fd = false;
} }
@ -506,12 +511,11 @@ static void kill_ssh_client(struct tmate_ssh_client *client,
free(client); free(client);
} }
static void connect_ssh_client(struct tmate_ssh_client *client) void connect_ssh_client(struct tmate_ssh_client *client)
{ {
if (!client->session) { assert(!client->session);
client->state = SSH_INIT; client->state = SSH_INIT;
on_ssh_client_event(client); on_ssh_client_event(client);
}
} }
static void ssh_log_function(int priority, const char *function, static void ssh_log_function(int priority, const char *function,
@ -522,9 +526,11 @@ static void ssh_log_function(int priority, const char *function,
struct tmate_ssh_client *tmate_ssh_client_alloc(struct tmate_session *session, struct tmate_ssh_client *tmate_ssh_client_alloc(struct tmate_session *session,
const char *server_ip) const char *server_ip)
{ {
struct tmate_ssh_client *client; struct tmate_ssh_client *client;
client = xmalloc(sizeof(*client)); client = xmalloc(sizeof(*client));
memset(client, 0, sizeof(*client));
ssh_set_log_callback(ssh_log_function); ssh_set_log_callback(ssh_log_function);
@ -544,7 +550,5 @@ struct tmate_ssh_client *tmate_ssh_client_alloc(struct tmate_session *session,
client->has_init_conn_fd = false; client->has_init_conn_fd = false;
connect_ssh_client(client);
return client; return client;
} }

View File

@ -22,7 +22,7 @@ struct tmate_encoder {
tmate_encoder_write_cb *ready_callback; tmate_encoder_write_cb *ready_callback;
void *userdata; void *userdata;
struct evbuffer *buffer; struct evbuffer *buffer;
struct event ev_buffer; struct event *ev_buffer;
bool ev_active; bool ev_active;
}; };
@ -142,10 +142,11 @@ struct tmate_ssh_client {
ssh_channel channel; ssh_channel channel;
bool has_init_conn_fd; bool has_init_conn_fd;
struct event ev_ssh; struct event *ev_ssh;
}; };
TAILQ_HEAD(tmate_ssh_clients, tmate_ssh_client); TAILQ_HEAD(tmate_ssh_clients, tmate_ssh_client);
extern void connect_ssh_client(struct tmate_ssh_client *client);
extern struct tmate_ssh_client *tmate_ssh_client_alloc(struct tmate_session *session, extern struct tmate_ssh_client *tmate_ssh_client_alloc(struct tmate_session *session,
const char *server_ip); const char *server_ip);
@ -154,7 +155,7 @@ extern struct tmate_ssh_client *tmate_ssh_client_alloc(struct tmate_session *ses
struct tmate_session { struct tmate_session {
struct event_base *ev_base; struct event_base *ev_base;
struct evdns_base *ev_dnsbase; struct evdns_base *ev_dnsbase;
struct event ev_dns_retry; struct event *ev_dns_retry;
struct tmate_encoder encoder; struct tmate_encoder encoder;
struct tmate_decoder decoder; struct tmate_decoder decoder;
@ -175,7 +176,7 @@ struct tmate_session {
char *passphrase; char *passphrase;
bool reconnected; bool reconnected;
struct event ev_connection_retry; struct event *ev_connection_retry;
char *last_server_ip; char *last_server_ip;
char *reconnection_data; char *reconnection_data;
/* /*