5 Author: Pekka Riikonen <priikone@silcnet.org>
7 Copyright (C) 2001 - 2003 Pekka Riikonen
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; version 2 of the License.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
21 #include "serverincludes.h"
22 #include "server_internal.h"
24 SILC_TASK_CALLBACK(silc_server_protocol_backup_done);
25 static void silc_server_backup_connect_primary(SilcServer server,
26 SilcServerEntry server_entry,
30 /************************** Types and Definitions ***************************/
34 SilcServerEntry server;
38 } SilcServerBackupEntry;
40 /* Holds IP address and port of the primary router that was replaced
45 SilcServerEntry server; /* Backup router that replaced the primary */
46 } SilcServerBackupReplaced;
49 struct SilcServerBackupStruct {
50 SilcServerBackupEntry *servers;
51 SilcUInt32 servers_count;
52 SilcServerBackupReplaced **replaced;
53 SilcUInt32 replaced_count;
59 SilcServerEntry server_entry;
60 } SilcServerBackupProtocolSession;
62 /* Backup resuming protocol context */
65 SilcSocketConnection sock;
68 SilcServerBackupProtocolSession *sessions;
69 SilcUInt32 sessions_count;
71 unsigned int responder : 1;
72 unsigned int received_failure : 1;
73 unsigned int timeout : 1;
74 } *SilcServerBackupProtocolContext;
77 /********************* Backup Configuration Routines ************************/
79 /* Adds the `backup_server' to be one of our backup router. This can be
80 called multiple times to set multiple backup routers. The `ip' and `port'
81 is the IP and port that the `backup_router' will replace if the `ip'
82 will become unresponsive. If `local' is TRUE then the `backup_server' is
83 in the local cell, if FALSE it is in some other cell. */
85 void silc_server_backup_add(SilcServer server, SilcServerEntry backup_server,
86 const char *ip, int port, bool local)
93 if (!server->backup) {
94 server->backup = silc_calloc(1, sizeof(*server->backup));
99 /* See if already added */
100 for (i = 0; i < server->backup->servers_count; i++) {
101 if (server->backup->servers[i].server == backup_server)
105 SILC_LOG_DEBUG(("Backup router %s will replace %s",
106 ((SilcSocketConnection)backup_server->connection)->ip,
109 for (i = 0; i < server->backup->servers_count; i++) {
110 if (!server->backup->servers[i].server) {
111 server->backup->servers[i].server = backup_server;
112 server->backup->servers[i].local = local;
113 server->backup->servers[i].port = SILC_SWAB_16(port);
114 memset(server->backup->servers[i].ip.data, 0,
115 sizeof(server->backup->servers[i].ip.data));
116 silc_net_addr2bin(ip, server->backup->servers[i].ip.data,
117 sizeof(server->backup->servers[i].ip.data));
122 i = server->backup->servers_count;
123 server->backup->servers = silc_realloc(server->backup->servers,
124 sizeof(*server->backup->servers) *
126 server->backup->servers[i].server = backup_server;
127 server->backup->servers[i].local = local;
128 server->backup->servers[i].port = SILC_SWAB_16(port);
129 memset(server->backup->servers[i].ip.data, 0,
130 sizeof(server->backup->servers[i].ip.data));
131 silc_net_addr2bin(ip, server->backup->servers[i].ip.data,
132 sizeof(server->backup->servers[i].ip.data));
133 server->backup->servers_count++;
136 /* Returns backup router for IP and port in `server_id' or NULL if there
137 does not exist backup router. */
139 SilcServerEntry silc_server_backup_get(SilcServer server,
140 SilcServerID *server_id)
147 for (i = 0; i < server->backup->servers_count; i++) {
148 if (server->backup->servers[i].server &&
149 server->backup->servers[i].port == server_id->port &&
150 !memcmp(server->backup->servers[i].ip.data, server_id->ip.data,
151 sizeof(server_id->ip.data))) {
152 SILC_LOG_DEBUG(("Found backup router %s for %s",
153 server->backup->servers[i].server->server_name,
154 silc_id_render(server_id, SILC_ID_SERVER)));
155 return server->backup->servers[i].server;
162 /* Deletes the backup server `server_entry'. */
164 void silc_server_backup_del(SilcServer server, SilcServerEntry server_entry)
171 for (i = 0; i < server->backup->servers_count; i++) {
172 if (server->backup->servers[i].server == server_entry) {
173 SILC_LOG_DEBUG(("Removing %s as backup router",
174 silc_id_render(server->backup->servers[i].server->id,
176 server->backup->servers[i].server = NULL;
177 memset(server->backup->servers[i].ip.data, 0,
178 sizeof(server->backup->servers[i].ip.data));
183 /* Frees all data allocated for backup routers. Call this after deleting
184 all backup routers and when new routers are added no more, for example
185 when shutting down the server. */
187 void silc_server_backup_free(SilcServer server)
194 /* Delete existing servers if caller didn't do it */
195 for (i = 0; i < server->backup->servers_count; i++) {
196 if (server->backup->servers[i].server)
197 silc_server_backup_del(server, server->backup->servers[i].server);
200 silc_free(server->backup->servers);
201 silc_free(server->backup);
202 server->backup = NULL;
205 /* Marks the IP address and port from the `server_id' as being replaced
206 by backup router indicated by the `server'. If the router connects at
207 a later time we can check whether it has been replaced by an backup
210 void silc_server_backup_replaced_add(SilcServer server,
211 SilcServerID *server_id,
212 SilcServerEntry server_entry)
215 SilcServerBackupReplaced *r = silc_calloc(1, sizeof(*r));;
218 server->backup = silc_calloc(1, sizeof(*server->backup));
219 if (!server->backup->replaced) {
220 server->backup->replaced =
221 silc_calloc(1, sizeof(*server->backup->replaced));
222 server->backup->replaced_count = 1;
225 SILC_LOG_DEBUG(("Replacing router %s with %s",
226 silc_id_render(server_id, SILC_ID_SERVER),
227 server_entry->server_name));
229 memcpy(&r->ip, &server_id->ip, sizeof(server_id->ip));
230 r->server = server_entry;
232 for (i = 0; i < server->backup->replaced_count; i++) {
233 if (!server->backup->replaced[i]) {
234 server->backup->replaced[i] = r;
239 i = server->backup->replaced_count;
240 server->backup->replaced = silc_realloc(server->backup->replaced,
241 sizeof(*server->backup->replaced) *
243 server->backup->replaced[i] = r;
244 server->backup->replaced_count++;
247 /* Checks whether the IP address and port from the `server_id' has been
248 replaced by an backup router. If it has been then this returns TRUE
249 and the bacup router entry to the `server' pointer if non-NULL. Returns
250 FALSE if the router is not replaced by backup router. */
252 bool silc_server_backup_replaced_get(SilcServer server,
253 SilcServerID *server_id,
254 SilcServerEntry *server_entry)
258 if (!server->backup || !server->backup->replaced)
261 for (i = 0; i < server->backup->replaced_count; i++) {
262 if (!server->backup->replaced[i])
264 if (!memcmp(server->backup->replaced[i]->ip.data, server_id->ip.data,
265 sizeof(server_id->ip.data))) {
267 *server_entry = server->backup->replaced[i]->server;
268 SILC_LOG_DEBUG(("Router %s is replaced by %s",
269 silc_id_render(server_id, SILC_ID_SERVER),
270 server->backup->replaced[i]->server->server_name));
275 SILC_LOG_DEBUG(("Router %s is not replaced by backup router",
276 silc_id_render(server_id, SILC_ID_SERVER)));
280 /* Deletes a replaced host by the set `server_entry. */
282 void silc_server_backup_replaced_del(SilcServer server,
283 SilcServerEntry server_entry)
287 if (!server->backup || !server->backup->replaced)
290 for (i = 0; i < server->backup->replaced_count; i++) {
291 if (!server->backup->replaced[i])
293 if (server->backup->replaced[i]->server == server_entry) {
294 silc_free(server->backup->replaced[i]);
295 server->backup->replaced[i] = NULL;
301 /* Broadcast the received packet indicated by `packet' to all of our backup
302 routers. All router wide information is passed using broadcast packets.
303 That is why all backup routers need to get this data too. It is expected
304 that the caller already knows that the `packet' is broadcast packet. */
306 void silc_server_backup_broadcast(SilcServer server,
307 SilcSocketConnection sender,
308 SilcPacketContext *packet)
310 SilcServerEntry backup;
311 SilcSocketConnection sock;
313 const SilcBufferStruct p;
314 SilcIDListData idata;
317 if (!server->backup || server->server_type != SILC_ROUTER)
320 SILC_LOG_DEBUG(("Broadcasting received packet to backup routers"));
322 buffer = packet->buffer;
323 silc_buffer_push(buffer, buffer->data - buffer->head);
325 for (i = 0; i < server->backup->servers_count; i++) {
326 backup = server->backup->servers[i].server;
328 if (!backup || backup->connection == sender ||
329 server->backup->servers[i].local == FALSE)
331 if (server->backup->servers[i].server == server->id_entry)
334 idata = (SilcIDListData)backup;
335 sock = backup->connection;
337 if (!silc_packet_send_prepare(sock, 0, 0, buffer->len, idata->hmac_send,
338 (const SilcBuffer)&p)) {
339 SILC_LOG_ERROR(("Cannot send packet"));
342 silc_buffer_put((SilcBuffer)&p, buffer->data, buffer->len);
343 silc_packet_encrypt(idata->send_key, idata->hmac_send, idata->psn_send++,
344 (SilcBuffer)&p, p.len);
346 SILC_LOG_HEXDUMP(("Broadcasted packet, len %d", p.len), p.data, p.len);
348 /* Now actually send the packet */
349 silc_server_packet_send_real(server, sock, FALSE);
351 /* Check for mandatory rekey */
352 if (idata->psn_send == SILC_SERVER_REKEY_THRESHOLD)
353 silc_schedule_task_add(server->schedule, sender->sock,
354 silc_server_rekey_callback, sender, 0, 1,
355 SILC_TASK_TIMEOUT, SILC_TASK_PRI_NORMAL);
359 /* A generic routine to send data to all backup routers. If the `sender'
360 is provided it will indicate the original sender of the packet and the
361 packet won't be resent to that entity. The `data' is the data that will
362 be assembled to packet context before sending. The packet will be
363 encrypted this function. If the `force_send' is TRUE the data is sent
364 immediately and not put to queue. If `local' is TRUE then the packet
365 will be sent only to local backup routers inside the cell. If false the
366 packet can go from one cell to the other. This function has no effect
367 if there are no any backup routers. */
369 void silc_server_backup_send(SilcServer server,
370 SilcServerEntry sender,
372 SilcPacketFlags flags,
378 SilcServerEntry backup;
379 SilcSocketConnection sock;
382 if (!server->backup || server->server_type != SILC_ROUTER)
385 for (i = 0; i < server->backup->servers_count; i++) {
386 backup = server->backup->servers[i].server;
387 if (!backup || sender == backup)
389 if (local && server->backup->servers[i].local == FALSE)
391 if (server->backup->servers[i].server == server->id_entry)
394 sock = backup->connection;
396 SILC_LOG_DEBUG(("Sending %s packet to backup router %s (%s)",
397 silc_get_packet_name(type), sock->hostname, sock->ip));
399 silc_server_packet_send(server, backup->connection, type, flags,
400 data, data_len, force_send);
404 /* Same as silc_server_backup_send but sets a specific Destination ID to
405 the packet. The Destination ID is indicated by the `dst_id' and the
406 ID type `dst_id_type'. For example, packets destined to channels must
407 be sent using this function. */
409 void silc_server_backup_send_dest(SilcServer server,
410 SilcServerEntry sender,
412 SilcPacketFlags flags,
414 SilcIdType dst_id_type,
420 SilcServerEntry backup;
421 SilcSocketConnection sock;
424 if (!server->backup || server->server_type != SILC_ROUTER)
427 for (i = 0; i < server->backup->servers_count; i++) {
428 backup = server->backup->servers[i].server;
429 if (!backup || sender == backup)
431 if (local && server->backup->servers[i].local == FALSE)
433 if (server->backup->servers[i].server == server->id_entry)
436 sock = backup->connection;
438 SILC_LOG_DEBUG(("Sending %s packet to backup router %s (%s)",
439 silc_get_packet_name(type), sock->hostname, sock->ip));
441 silc_server_packet_send_dest(server, backup->connection, type, flags,
442 dst_id, dst_id_type, data, data_len,
447 /* Send the START_USE indication to remote connection. If `failure' is
448 TRUE then this sends SILC_PACKET_FAILURE. Otherwise it sends
449 SILC_PACKET_RESUME_ROUTER. */
451 void silc_server_backup_send_start_use(SilcServer server,
452 SilcSocketConnection sock,
455 unsigned char data[4];
457 SILC_LOG_DEBUG(("Sending START_USE (%s) to %s",
458 failure ? "failure" : "success", sock->ip));
461 SILC_PUT32_MSB(SILC_SERVER_BACKUP_START_USE, data);
462 silc_server_packet_send(server, sock, SILC_PACKET_FAILURE, 0,
465 data[0] = SILC_SERVER_BACKUP_START_USE;
467 silc_server_packet_send(server, sock,
468 SILC_PACKET_RESUME_ROUTER, 0,
473 /* Send the REPLACED indication to remote router. This is send by the
474 primary router (remote router) of the primary router that came back
475 online. This is not sent by backup router or any other server. */
477 void silc_server_backup_send_replaced(SilcServer server,
478 SilcSocketConnection sock)
480 unsigned char data[4];
482 SILC_LOG_DEBUG(("Sending REPLACED (%s) to %s", sock->ip));
484 data[0] = SILC_SERVER_BACKUP_REPLACED;
486 silc_server_packet_send(server, sock,
487 SILC_PACKET_RESUME_ROUTER, 0,
492 /************************ Backup Resuming Protocol **************************/
494 /* Timeout callback for protocol */
496 SILC_TASK_CALLBACK(silc_server_backup_timeout)
498 SilcProtocol protocol = context;
499 SilcServerBackupProtocolContext ctx = protocol->context;
500 SilcServer server = app_context;
502 SILC_LOG_INFO(("Timeout occurred during backup resuming protocol"));
504 silc_protocol_cancel(protocol, server->schedule);
505 protocol->state = SILC_PROTOCOL_STATE_ERROR;
506 silc_protocol_execute_final(protocol, server->schedule);
509 /* Callback to start the protocol as responder */
511 SILC_TASK_CALLBACK(silc_server_backup_responder_start)
513 SilcServerBackupProtocolContext proto_ctx = context;
514 SilcSocketConnection sock = proto_ctx->sock;
515 SilcServer server = app_context;
517 /* If other protocol is executing at the same time, start with timeout. */
518 if (sock->protocol) {
519 SILC_LOG_DEBUG(("Other protocol is executing, wait for it to finish"));
520 silc_schedule_task_add(server->schedule, sock->sock,
521 silc_server_backup_responder_start,
523 SILC_TASK_TIMEOUT, SILC_TASK_PRI_NORMAL);
527 /* Run the backup resuming protocol */
528 silc_protocol_alloc(SILC_PROTOCOL_SERVER_BACKUP,
529 &sock->protocol, proto_ctx,
530 silc_server_protocol_backup_done);
531 silc_protocol_execute(sock->protocol, server->schedule, 0, 0);
532 silc_schedule_task_add(server->schedule, sock->sock,
533 silc_server_backup_timeout,
534 sock->protocol, 30, 0, SILC_TASK_TIMEOUT,
535 SILC_TASK_PRI_NORMAL);
538 /* Callback to send START_USE to backup to check whether using backup
541 SILC_TASK_CALLBACK(silc_server_backup_check_status)
543 SilcSocketConnection sock = context;
544 SilcServer server = app_context;
546 /* Check whether we are still using backup */
547 if (!server->backup_primary)
550 silc_server_backup_send_start_use(server, sock, FALSE);
551 silc_socket_free(sock); /* unref */
556 SilcSocketConnection sock;
557 SilcPacketContext *packet;
558 } *SilcServerBackupPing;
560 /* PING command reply callback */
562 void silc_server_backup_ping_reply(void *context, void *reply)
564 SilcServerBackupPing pc = context;
565 SilcServerCommandReplyContext cmdr = reply;
567 if (cmdr && !silc_command_get_status(cmdr->payload, NULL, NULL)) {
568 /* Timeout error occurred, the primary is really down. */
569 SilcSocketConnection primary = SILC_PRIMARY_ROUTE(pc->server);
571 SILC_LOG_DEBUG(("PING timeout, primary is down"));
574 if (primary->user_data)
575 silc_server_free_sock_user_data(pc->server, primary, NULL);
576 SILC_SET_DISCONNECTING(primary);
577 silc_server_close_connection(pc->server, primary);
580 /* Reprocess the RESUME_ROUTER packet */
581 silc_server_backup_resume_router(pc->server, pc->sock, pc->packet);
583 /* The primary is not down, refuse to serve the server as primary */
584 SILC_LOG_DEBUG(("PING received, primary is up"));
585 silc_server_backup_send_start_use(pc->server, pc->sock, TRUE);
588 silc_socket_free(pc->sock);
589 silc_packet_context_free(pc->packet);
593 /* Processes incoming RESUME_ROUTER packet. This can give the packet
594 for processing to the protocol handler or allocate new protocol if
595 start command is received. */
597 void silc_server_backup_resume_router(SilcServer server,
598 SilcSocketConnection sock,
599 SilcPacketContext *packet)
601 SilcUInt8 type, session;
602 SilcServerBackupProtocolContext ctx;
603 SilcIDListData idata;
606 SILC_LOG_DEBUG(("Received RESUME_ROUTER packet"));
608 if (sock->type == SILC_SOCKET_TYPE_CLIENT ||
609 sock->type == SILC_SOCKET_TYPE_UNKNOWN) {
610 SILC_LOG_DEBUG(("Bad packet received"));
614 idata = (SilcIDListData)sock->user_data;
616 ret = silc_buffer_unformat(packet->buffer,
617 SILC_STR_UI_CHAR(&type),
618 SILC_STR_UI_CHAR(&session),
621 SILC_LOG_ERROR(("Malformed resume router packet received"));
625 /* Check whether this packet is used to tell us that server will start
626 using us as primary router. */
627 if (type == SILC_SERVER_BACKUP_START_USE) {
629 SilcServerBackupPing pc;
631 /* If we are normal server then backup router has sent us back
632 this reply and we use the backup as primary router now. */
633 if (server->server_type == SILC_SERVER) {
634 /* Nothing to do here actually, since we have switched already. */
635 SILC_LOG_DEBUG(("Received successful START_USE from backup router"));
639 /* Backup router following. */
641 /* If we are marked as router then the primary is down and we send
642 success START_USE back to the server. */
643 if (server->server_type == SILC_ROUTER) {
644 SILC_LOG_DEBUG(("Sending success START_USE back to %s", sock->ip));
645 silc_server_backup_send_start_use(server, sock, FALSE);
649 /* We have just lost primary, send success START_USE back */
650 if (server->standalone) {
651 SILC_LOG_DEBUG(("We are stanalone, sending success START_USE back to %s",
653 silc_server_backup_send_start_use(server, sock, FALSE);
657 /* We are backup router. This server claims that our primary is down.
658 We will check this ourselves by sending PING command to the primary. */
659 SILC_LOG_DEBUG(("Sending PING to detect status of primary router"));
660 idp = silc_id_payload_encode(server->router->id, SILC_ID_SERVER);
661 silc_server_send_command(server, SILC_PRIMARY_ROUTE(server),
662 SILC_COMMAND_PING, ++server->cmd_ident, 1,
663 1, idp->data, idp->len);
664 silc_buffer_free(idp);
666 /* Reprocess this packet after received reply from router */
667 pc = silc_calloc(1, sizeof(*pc));
669 pc->sock = silc_socket_dup(sock);
670 pc->packet = silc_packet_context_dup(packet);
671 silc_server_command_pending_timed(server, SILC_COMMAND_PING,
673 silc_server_backup_ping_reply, pc, 15);
678 /* Start the resuming protocol if requested. */
679 if (type == SILC_SERVER_BACKUP_START) {
680 /* We have received a start for resuming protocol. We are either
681 primary router that came back online or normal server. */
682 SilcServerBackupProtocolContext proto_ctx;
684 /* If backup had closed the connection earlier we won't allow resuming
685 since we (primary router) have never gone away. */
686 if (server->server_type == SILC_ROUTER && !server->backup_router &&
687 server->backup_closed) {
688 unsigned char data[4];
689 SILC_LOG_DEBUG(("Backup resuming not allowed since we are still "
691 SILC_PUT32_MSB(SILC_SERVER_BACKUP_START, data);
692 silc_server_packet_send(server, sock, SILC_PACKET_FAILURE, 0,
694 server->backup_closed = FALSE;
698 proto_ctx = silc_calloc(1, sizeof(*proto_ctx));
699 proto_ctx->server = server;
700 proto_ctx->sock = silc_socket_dup(sock);
701 proto_ctx->responder = TRUE;
702 proto_ctx->type = type;
703 proto_ctx->session = session;
704 proto_ctx->start = time(0);
706 SILC_LOG_DEBUG(("Starting backup resuming protocol as responder"));
707 SILC_LOG_INFO(("Starting backup resuming protocol"));
709 /* Start protocol immediately */
710 silc_schedule_task_add(server->schedule, sock->sock,
711 silc_server_backup_responder_start,
713 SILC_TASK_TIMEOUT, SILC_TASK_PRI_NORMAL);
718 /* If we are router and the packet is coming from our primary router
719 then it means we have been replaced by an backup router in our cell. */
720 if (type == SILC_SERVER_BACKUP_REPLACED &&
721 server->server_type == SILC_ROUTER &&
722 sock->type == SILC_SOCKET_TYPE_ROUTER &&
723 SILC_PRIMARY_ROUTE(server) == sock) {
724 /* We have been replaced by an backup router in our cell. We must
725 mark our primary router connection disabled since we are not allowed
726 to use it at this moment. */
727 SILC_LOG_INFO(("We are replaced by an backup router in this cell, will "
728 "wait until backup resuming protocol is executed"));
729 idata->status |= SILC_IDLIST_STATUS_DISABLED;
734 /* Activate the shared protocol context for this socket connection
736 if (type == SILC_SERVER_BACKUP_RESUMED &&
737 sock->type == SILC_SOCKET_TYPE_ROUTER && !sock->protocol &&
738 idata->status & SILC_IDLIST_STATUS_DISABLED) {
739 SilcServerEntry backup_router;
741 if (silc_server_backup_replaced_get(server, ((SilcServerEntry)idata)->id,
743 SilcSocketConnection bsock =
744 (SilcSocketConnection)backup_router->connection;
745 if (bsock->protocol && bsock->protocol->protocol &&
746 bsock->protocol->protocol->type == SILC_PROTOCOL_SERVER_BACKUP) {
747 sock->protocol = bsock->protocol;
748 ctx = sock->protocol->context;
750 silc_socket_free(ctx->sock); /* unref */
751 ctx->sock = silc_socket_dup(sock);
757 /* Call the resuming protocol if the protocol is active. */
758 if (SILC_SERVER_IS_BACKUP(sock)) {
759 ctx = sock->protocol->context;
762 for (i = 0; i < ctx->sessions_count; i++) {
763 if (session == ctx->sessions[i].session) {
764 ctx->session = session;
765 silc_protocol_execute(sock->protocol, server->schedule, 0, 0);
770 /* If RESUMED received the session ID is zero, execute the protocol. */
771 if (type == SILC_SERVER_BACKUP_RESUMED) {
772 silc_protocol_execute(sock->protocol, server->schedule, 0, 0);
776 SILC_LOG_ERROR(("Unknown backup resuming session %d", session));
781 /* Timeout task callback to connect to remote router */
783 SILC_TASK_CALLBACK(silc_server_backup_connect_to_router)
785 SilcServer server = app_context;
786 SilcServerConnection sconn = (SilcServerConnection)context;
788 const char *server_ip;
790 SILC_LOG_DEBUG(("Connecting to router %s:%d", sconn->remote_host,
791 sconn->remote_port));
793 /* Connect to remote host */
794 server_ip = server->config->server_info->primary == NULL ? NULL :
795 server->config->server_info->primary->server_ip;
796 sock = silc_net_create_connection(server_ip, sconn->remote_port,
799 if (server->server_type == SILC_SERVER) {
800 sconn->retry_count++;
801 if (sconn->retry_count > 3) {
802 silc_free(sconn->remote_host);
807 silc_schedule_task_add(server->schedule, 0,
808 silc_server_backup_connect_to_router,
809 context, 10, 0, SILC_TASK_TIMEOUT,
810 SILC_TASK_PRI_NORMAL);
814 /* Continue with key exchange protocol */
815 silc_server_start_key_exchange(server, sconn, sock);
818 /* Constantly tries to reconnect to a primary router indicated by the
819 `ip' and `port'. The `connected' callback will be called when the
820 connection is created. */
822 void silc_server_backup_reconnect(SilcServer server,
823 const char *ip, SilcUInt16 port,
824 SilcServerConnectRouterCallback callback,
827 SilcServerConnection sconn;
829 SILC_LOG_INFO(("Attempting to reconnect to primary router"));
831 sconn = silc_calloc(1, sizeof(*sconn));
832 sconn->remote_host = strdup(ip);
833 sconn->remote_port = port;
834 sconn->callback = callback;
835 sconn->callback_context = context;
836 sconn->no_reconnect = TRUE;
837 sconn->retry_count = 0;
838 silc_schedule_task_add(server->schedule, 0,
839 silc_server_backup_connect_to_router,
840 sconn, 1, 0, SILC_TASK_TIMEOUT,
841 SILC_TASK_PRI_NORMAL);
844 /* Task that is called after backup router has connected back to
845 primary router and we are starting the resuming protocol */
847 SILC_TASK_CALLBACK(silc_server_backup_connected_later)
849 SilcServerBackupProtocolContext proto_ctx =
850 (SilcServerBackupProtocolContext)context;
851 SilcServer server = proto_ctx->server;
852 SilcSocketConnection sock = proto_ctx->sock;
854 /* If running other protocol already run this one a bit later. */
855 if (sock->protocol) {
856 SILC_LOG_DEBUG(("Other protocol is running, wait for it to finish"));
857 silc_schedule_task_add(server->schedule, 0,
858 silc_server_backup_connected_later,
861 SILC_TASK_PRI_NORMAL);
865 SILC_LOG_DEBUG(("Starting backup resuming protocol as initiator"));
866 SILC_LOG_INFO(("Starting backup resuming protocol"));
868 /* Run the backup resuming protocol */
869 silc_protocol_alloc(SILC_PROTOCOL_SERVER_BACKUP,
870 &sock->protocol, proto_ctx,
871 silc_server_protocol_backup_done);
872 silc_protocol_execute(sock->protocol, server->schedule, 0, 0);
874 silc_schedule_task_add(server->schedule, sock->sock,
875 silc_server_backup_timeout,
876 sock->protocol, 30, 0, SILC_TASK_TIMEOUT,
877 SILC_TASK_PRI_NORMAL);
880 /* Called when we've established connection back to our primary router
881 when we've acting as backup router and have replaced the primary router
882 in the cell. This function will start the backup resuming protocol. */
884 void silc_server_backup_connected(SilcServer server,
885 SilcServerEntry server_entry,
888 SilcServerBackupProtocolContext proto_ctx;
889 SilcSocketConnection sock;
893 SilcServerConfigRouter *primary;
894 primary = silc_server_config_get_primary_router(server);
896 if (!silc_server_find_socket_by_host(server, SILC_SOCKET_TYPE_ROUTER,
897 primary->host, primary->port))
898 silc_server_backup_reconnect(server,
899 primary->host, primary->port,
900 silc_server_backup_connected,
906 sock = (SilcSocketConnection)server_entry->connection;
907 proto_ctx = silc_calloc(1, sizeof(*proto_ctx));
908 proto_ctx->server = server;
909 proto_ctx->sock = silc_socket_dup(sock);
910 proto_ctx->responder = FALSE;
911 proto_ctx->type = SILC_SERVER_BACKUP_START;
912 proto_ctx->start = time(0);
914 /* Start through scheduler */
915 silc_schedule_task_add(server->schedule, 0,
916 silc_server_backup_connected_later,
919 SILC_TASK_PRI_NORMAL);
922 /* Called when normal server has connected to its primary router after
923 backup router has sent the START packet in reusming protocol. We will
924 move the protocol context from the backup router connection to the
927 static void silc_server_backup_connect_primary(SilcServer server,
928 SilcServerEntry server_entry,
931 SilcSocketConnection backup_router = (SilcSocketConnection)context;
932 SilcServerBackupProtocolContext ctx;
933 SilcSocketConnection sock;
934 SilcIDListData idata;
935 unsigned char data[2];
937 if (SILC_IS_DISCONNECTING(backup_router) ||
938 SILC_IS_DISCONNECTED(backup_router)) {
939 silc_socket_free(backup_router);
945 SilcServerConfigRouter *primary;
946 primary = silc_server_config_get_primary_router(server);
948 if (!silc_server_find_socket_by_host(server, SILC_SOCKET_TYPE_ROUTER,
949 primary->host, primary->port))
950 silc_server_backup_reconnect(server,
951 primary->host, primary->port,
952 silc_server_backup_connect_primary,
958 silc_socket_free(backup_router);
960 if (!backup_router->protocol)
962 if (!server_entry->connection)
965 ctx = (SilcServerBackupProtocolContext)backup_router->protocol->context;
966 sock = (SilcSocketConnection)server_entry->connection;
967 idata = (SilcIDListData)server_entry;
969 SILC_LOG_DEBUG(("Sending CONNECTED packet (session %d)", ctx->session));
970 SILC_LOG_INFO(("Sending CONNECTED (session %d) to backup router",
973 /* Send the CONNECTED packet back to the backup router. */
974 data[0] = SILC_SERVER_BACKUP_CONNECTED;
975 data[1] = ctx->session;
976 silc_server_packet_send(server, backup_router,
977 SILC_PACKET_RESUME_ROUTER, 0, data, 2, FALSE);
979 /* The primary connection is disabled until it sends the RESUMED packet
981 idata->status |= SILC_IDLIST_STATUS_DISABLED;
983 /* Move this protocol context from this backup router connection to
984 the primary router connection since it will send the subsequent
985 packets in this protocol. We don't talk with backup router
987 sock->protocol = backup_router->protocol;
989 silc_socket_free(ctx->sock); /* unref */
990 ctx->sock = silc_socket_dup(server_entry->connection);
991 backup_router->protocol = NULL;
994 /* Timeout callback used by the backup router to send the ENDING packet
995 to primary router to indicate that it can now resume as being primary
996 router. All CONNECTED packets has been received when we reach this. */
998 SILC_TASK_CALLBACK(silc_server_backup_send_resumed)
1000 SilcProtocol protocol = (SilcProtocol)context;
1001 SilcServerBackupProtocolContext ctx = protocol->context;
1002 SilcServer server = ctx->server;
1003 unsigned char data[2];
1006 SILC_LOG_DEBUG(("Start"));
1008 for (i = 0; i < ctx->sessions_count; i++)
1009 if (ctx->sessions[i].server_entry == ctx->sock->user_data)
1010 ctx->session = ctx->sessions[i].session;
1012 /* We've received all the CONNECTED packets and now we'll send the
1013 ENDING packet to the new primary router. */
1014 data[0] = SILC_SERVER_BACKUP_ENDING;
1015 data[1] = ctx->session;
1016 silc_server_packet_send(server, ctx->sock, SILC_PACKET_RESUME_ROUTER, 0,
1017 data, sizeof(data), FALSE);
1019 /* The protocol will go to END state. */
1020 protocol->state = SILC_PROTOCOL_STATE_END;
1023 /* Backup resuming protocol. This protocol is executed when the primary
1024 router wants to resume its position as being primary router. */
1026 SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup)
1028 SilcProtocol protocol = (SilcProtocol)context;
1029 SilcServerBackupProtocolContext ctx = protocol->context;
1030 SilcServer server = ctx->server;
1031 SilcServerEntry server_entry;
1032 SilcSocketConnection sock = NULL;
1033 unsigned char data[2];
1036 if (protocol->state == SILC_PROTOCOL_STATE_UNKNOWN)
1037 protocol->state = SILC_PROTOCOL_STATE_START;
1039 switch(protocol->state) {
1040 case SILC_PROTOCOL_STATE_START:
1041 if (ctx->responder == FALSE) {
1043 * Initiator (backup router)
1046 /* Send the START packet to primary router and normal servers. The
1047 packet will indicate to the primary router that it has been replaced
1048 by us. For normal servers it means that we will be resigning as
1049 being primary router shortly. */
1050 for (i = 0; i < server->config->param.connections_max; i++) {
1051 sock = server->sockets[i];
1052 if (!sock || !sock->user_data ||
1053 sock->user_data == server->id_entry ||
1054 (sock->type != SILC_SOCKET_TYPE_ROUTER &&
1055 sock->type != SILC_SOCKET_TYPE_SERVER))
1058 server_entry = sock->user_data;
1059 if (server_entry->data.status & SILC_IDLIST_STATUS_DISABLED)
1062 ctx->sessions = silc_realloc(ctx->sessions,
1063 sizeof(*ctx->sessions) *
1064 (ctx->sessions_count + 1));
1065 ctx->sessions[ctx->sessions_count].session = ctx->sessions_count;
1066 ctx->sessions[ctx->sessions_count].connected = FALSE;
1067 ctx->sessions[ctx->sessions_count].server_entry = server_entry;
1069 SILC_LOG_DEBUG(("Sending START to %s (session %d)",
1070 server_entry->server_name, ctx->sessions_count));
1071 SILC_LOG_INFO(("Expecting CONNECTED from %s (session %d)",
1072 server_entry->server_name, ctx->sessions_count));
1074 /* This connection is performing this protocol too now */
1075 sock->protocol = protocol;
1077 data[0] = SILC_SERVER_BACKUP_START;
1078 data[1] = ctx->sessions_count;
1079 silc_server_packet_send(server, sock, SILC_PACKET_RESUME_ROUTER, 0,
1080 data, sizeof(data), FALSE);
1081 ctx->sessions_count++;
1084 /* Announce data to the new primary to be. */
1085 silc_server_announce_servers(server, TRUE, 0, ctx->sock);
1086 silc_server_announce_clients(server, 0, ctx->sock);
1087 silc_server_announce_channels(server, 0, ctx->sock);
1093 * Responder (all servers and routers)
1095 SilcServerConfigRouter *primary;
1097 /* We should have received START packet */
1098 if (ctx->type != SILC_SERVER_BACKUP_START) {
1099 SILC_LOG_ERROR(("Bad resume router packet START %d", ctx->type));
1103 /* Connect to the primary router that was down that is now supposed
1104 to be back online. We send the CONNECTED packet after we've
1105 established the connection to the primary router. */
1106 primary = silc_server_config_get_primary_router(server);
1107 if (primary && server->backup_primary &&
1108 !silc_server_num_sockets_by_remote(server,
1109 silc_net_is_ip(primary->host) ?
1110 primary->host : NULL,
1111 silc_net_is_ip(primary->host) ?
1112 NULL : primary->host,
1114 SILC_SOCKET_TYPE_ROUTER)) {
1115 SILC_LOG_DEBUG(("Received START (session %d), reconnect to router",
1117 silc_server_backup_reconnect(server,
1118 primary->host, primary->port,
1119 silc_server_backup_connect_primary,
1120 silc_socket_dup(ctx->sock));
1122 /* Nowhere to connect just return the CONNECTED packet */
1123 SILC_LOG_DEBUG(("Received START (session %d), send CONNECTED back",
1125 SILC_LOG_INFO(("Sending CONNECTED (session %d) to backup router",
1128 /* Send the CONNECTED packet back to the backup router. */
1129 data[0] = SILC_SERVER_BACKUP_CONNECTED;
1130 data[1] = ctx->session;
1131 silc_server_packet_send(server, ctx->sock,
1132 SILC_PACKET_RESUME_ROUTER, 0,
1133 data, sizeof(data), FALSE);
1136 /* Add this resuming session */
1137 ctx->sessions = silc_realloc(ctx->sessions,
1138 sizeof(*ctx->sessions) *
1139 (ctx->sessions_count + 1));
1140 ctx->sessions[ctx->sessions_count].session = ctx->session;
1141 ctx->sessions_count++;
1143 /* Normal server goes directly to the END state. */
1144 if (server->server_type == SILC_ROUTER &&
1146 server->router->data.status & SILC_IDLIST_STATUS_DISABLED))
1149 protocol->state = SILC_PROTOCOL_STATE_END;
1154 if (ctx->responder == FALSE) {
1156 * Initiator (backup router)
1159 /* We should have received CONNECTED packet */
1160 if (ctx->type != SILC_SERVER_BACKUP_CONNECTED) {
1161 SILC_LOG_ERROR(("Bad resume router packet CONNECTED %d", ctx->type));
1165 for (i = 0; i < ctx->sessions_count; i++) {
1166 if (ctx->sessions[i].session == ctx->session) {
1167 ctx->sessions[i].connected = TRUE;
1168 SILC_LOG_INFO(("Received CONNECTED from %s (session %d)",
1169 ctx->sessions[i].server_entry->server_name,
1171 SILC_LOG_DEBUG(("Received CONNECTED (session %d)", ctx->session));
1176 /* See if all returned CONNECTED, if not, then continue waiting. */
1177 for (i = 0; i < ctx->sessions_count; i++) {
1178 if (!ctx->sessions[i].connected)
1182 SILC_LOG_INFO(("All sessions have returned CONNECTED packets, "
1184 SILC_LOG_DEBUG(("Sending ENDING packet to primary router"));
1186 /* The ENDING is sent with timeout, and then we continue to the
1187 END state in the protocol. */
1188 silc_schedule_task_add(server->schedule, 0,
1189 silc_server_backup_send_resumed,
1190 protocol, 1, 0, SILC_TASK_TIMEOUT,
1191 SILC_TASK_PRI_NORMAL);
1196 * Responder (primary router)
1199 /* We should have been received ENDING packet */
1200 if (ctx->type != SILC_SERVER_BACKUP_ENDING) {
1201 SILC_LOG_ERROR(("Bad resume router packet ENDING %d", ctx->type));
1205 SILC_LOG_DEBUG(("Received ENDING packet, we are going to resume now"));
1207 /* Switch announced informations to our primary router of using the
1209 silc_server_local_servers_toggle_enabled(server, TRUE);
1210 silc_server_update_servers_by_server(server, ctx->sock->user_data,
1212 silc_server_update_clients_by_server(server, ctx->sock->user_data,
1213 server->router, TRUE);
1215 /* We as primary router now must send RESUMED packets to all servers
1216 and routers so that they know we are back. For backup router we
1217 send the packet last so that we give the backup as much time as
1218 possible to deal with message routing at this critical moment. */
1219 for (i = 0; i < server->config->param.connections_max; i++) {
1220 sock = server->sockets[i];
1221 if (!sock || !sock->user_data ||
1222 sock->user_data == server->id_entry ||
1223 (sock->type != SILC_SOCKET_TYPE_ROUTER &&
1224 sock->type != SILC_SOCKET_TYPE_SERVER))
1227 /* Send to backup last */
1228 if (sock == ctx->sock)
1232 server_entry = sock->user_data;
1233 server_entry->data.status &= ~SILC_IDLIST_STATUS_DISABLED;
1235 SILC_LOG_DEBUG(("Sending RESUMED to %s", server_entry->server_name));
1236 SILC_LOG_INFO(("Sending RESUMED to %s", server_entry->server_name));
1238 /* This connection is performing this protocol too now */
1239 sock->protocol = protocol;
1241 data[0] = SILC_SERVER_BACKUP_RESUMED;
1243 silc_server_packet_send(server, sock, SILC_PACKET_RESUME_ROUTER, 0,
1244 data, sizeof(data), FALSE);
1245 silc_server_packet_queue_purge(server, sock);
1248 /* Now send the same packet to backup */
1249 if (sock != ctx->sock) {
1252 goto send_to_backup;
1255 /* We are now resumed and are back as primary router in the cell. */
1256 SILC_LOG_INFO(("We are now the primary router of our cell again"));
1257 server->wait_backup = FALSE;
1259 /* For us this is the end of this protocol. */
1260 if (protocol->final_callback)
1261 silc_protocol_execute_final(protocol, server->schedule);
1263 silc_protocol_free(protocol);
1267 case SILC_PROTOCOL_STATE_END:
1270 * Responder (backup router, servers, and remote router)
1272 SilcServerEntry router, backup_router;
1274 /* We should have been received RESUMED from our primary router. */
1275 if (ctx->type != SILC_SERVER_BACKUP_RESUMED) {
1276 SILC_LOG_ERROR(("Bad resume router packet RESUMED %d", ctx->type));
1280 SILC_LOG_INFO(("Received RESUMED from new primary router"));
1282 /* If we are the backup router, mark that we are no longer primary
1283 but are back to backup router status. */
1284 if (server->backup_router)
1285 server->server_type = SILC_BACKUP_ROUTER;
1287 /* We have now new primary router. All traffic goes there from now on. */
1288 router = ctx->sock->user_data;
1289 if (silc_server_backup_replaced_get(server, router->id,
1292 if (backup_router == server->router) {
1293 /* We have new primary router now */
1294 server->id_entry->router = router;
1295 server->router = router;
1296 SILC_LOG_INFO(("Switching back to primary router %s",
1297 server->router->server_name));
1299 /* We are connected to new primary and now continue using it */
1300 SILC_LOG_INFO(("Resuming the use of primary router %s",
1301 router->server_name));
1303 server->backup_primary = FALSE;
1304 sock = router->connection;
1306 /* Update the client entries of the backup router to the new
1308 silc_server_local_servers_toggle_enabled(server, FALSE);
1309 router->data.status &= ~SILC_IDLIST_STATUS_DISABLED;
1310 silc_server_update_servers_by_server(server, backup_router, router);
1311 silc_server_update_clients_by_server(
1312 server, NULL, router,
1313 server->server_type == SILC_BACKUP_ROUTER);
1314 if (server->server_type == SILC_SERVER)
1315 silc_server_update_channels_by_server(server, backup_router, router);
1316 silc_server_backup_replaced_del(server, backup_router);
1319 /* Send notify about primary router going down to local operators */
1320 SILC_SERVER_SEND_OPERS(server, FALSE, TRUE,
1321 SILC_NOTIFY_TYPE_NONE,
1322 ("%s resumed the use of primary router %s",
1323 server->server_name,
1324 server->router->server_name));
1326 /* Protocol has ended, call the final callback */
1327 if (protocol->final_callback)
1328 silc_protocol_execute_final(protocol, server->schedule);
1330 silc_protocol_free(protocol);
1334 case SILC_PROTOCOL_STATE_ERROR:
1335 /* Protocol has ended, call the final callback */
1336 if (protocol->final_callback)
1337 silc_protocol_execute_final(protocol, server->schedule);
1339 silc_protocol_free(protocol);
1342 case SILC_PROTOCOL_STATE_FAILURE:
1343 /* Protocol has ended, call the final callback */
1344 SILC_LOG_ERROR(("Error during backup resume: received Failure"));
1345 ctx->received_failure = TRUE;
1346 if (protocol->final_callback)
1347 silc_protocol_execute_final(protocol, server->schedule);
1349 silc_protocol_free(protocol);
1352 case SILC_PROTOCOL_STATE_UNKNOWN:
1357 /* Final resuming protocol completion callback */
1359 SILC_TASK_CALLBACK(silc_server_protocol_backup_done)
1361 SilcProtocol protocol = (SilcProtocol)context;
1362 SilcServerBackupProtocolContext ctx = protocol->context;
1363 SilcServer server = ctx->server;
1364 SilcServerEntry server_entry;
1365 SilcSocketConnection sock;
1369 silc_schedule_task_del_by_context(server->schedule, protocol);
1371 error = (protocol->state == SILC_PROTOCOL_STATE_ERROR ||
1372 protocol->state == SILC_PROTOCOL_STATE_FAILURE);
1375 SILC_LOG_ERROR(("Error occurred during backup router resuming protcool"));
1376 if (server->server_type == SILC_SERVER)
1377 silc_schedule_task_del_by_callback(server->schedule,
1378 silc_server_backup_connect_to_router);
1381 if (server->server_shutdown)
1384 /* Remove this protocol from all server entries that has it */
1385 for (i = 0; i < server->config->param.connections_max; i++) {
1386 sock = server->sockets[i];
1387 if (!sock || !sock->user_data ||
1388 (sock->type != SILC_SOCKET_TYPE_ROUTER &&
1389 sock->type != SILC_SOCKET_TYPE_SERVER))
1392 server_entry = sock->user_data;
1394 /* The SilcProtocol context was shared between all connections, clear
1395 it from all connections. */
1396 if (sock->protocol == protocol) {
1397 silc_server_packet_queue_purge(server, sock);
1398 sock->protocol = NULL;
1402 if (server->server_type == SILC_SERVER &&
1403 server_entry->server_type == SILC_ROUTER)
1407 if (SILC_PRIMARY_ROUTE(server) == sock && server->backup_router) {
1408 if (ctx->sock == sock) {
1409 silc_socket_free(sock); /* unref */
1413 if (!ctx->received_failure) {
1414 /* Protocol error, probably timeout. Just restart the protocol. */
1415 SilcServerBackupProtocolContext proto_ctx;
1417 /* Restart the protocol. */
1418 proto_ctx = silc_calloc(1, sizeof(*proto_ctx));
1419 proto_ctx->server = server;
1420 proto_ctx->sock = silc_socket_dup(sock);
1421 proto_ctx->responder = FALSE;
1422 proto_ctx->type = SILC_SERVER_BACKUP_START;
1423 proto_ctx->start = time(0);
1425 /* Start through scheduler */
1426 silc_schedule_task_add(server->schedule, 0,
1427 silc_server_backup_connected_later,
1430 SILC_TASK_PRI_NORMAL);
1432 /* If failure was received, switch back to normal backup router.
1433 For some reason primary wouldn't accept that we were supposed
1434 to perfom resuming protocol. */
1435 server->server_type = SILC_BACKUP_ROUTER;
1436 silc_server_local_servers_toggle_enabled(server, FALSE);
1437 server_entry->data.status &= ~SILC_IDLIST_STATUS_DISABLED;
1438 silc_server_update_servers_by_server(server, server->id_entry,
1440 silc_server_update_clients_by_server(server, NULL,
1441 sock->user_data, TRUE);
1443 /* Announce our clients and channels to the router */
1444 silc_server_announce_clients(server, 0, sock);
1445 silc_server_announce_channels(server, 0, sock);
1452 server_entry->data.status &= ~SILC_IDLIST_STATUS_DISABLED;
1457 SILC_LOG_INFO(("Backup resuming protocol ended successfully"));
1459 if (ctx->type == SILC_SERVER_BACKUP_RESUMED && server->router) {
1460 /* Announce all of our information to the router. */
1461 if (server->server_type == SILC_ROUTER)
1462 silc_server_announce_servers(server, FALSE, 0,
1463 server->router->connection);
1465 /* Announce our clients and channels to the router */
1466 silc_server_announce_clients(server, 0, server->router->connection);
1467 silc_server_announce_channels(server, 0, server->router->connection);
1472 if (server->server_type == SILC_SERVER) {
1473 /* If we are still using backup router Send confirmation to backup
1474 that using it is still ok and continue sending traffic there.
1475 The backup will reply with error if it's not ok. */
1476 if (server->router && server->backup_primary) {
1477 /* Send START_USE just in case using backup wouldn't be ok. */
1478 silc_server_backup_send_start_use(server, server->router->connection,
1481 /* Check couple of times same START_USE just in case. */
1482 silc_schedule_task_add(server->schedule, 0,
1483 silc_server_backup_check_status,
1484 silc_socket_dup(server->router->connection),
1485 5, 1, SILC_TASK_TIMEOUT,
1486 SILC_TASK_PRI_NORMAL);
1487 silc_schedule_task_add(server->schedule, 0,
1488 silc_server_backup_check_status,
1489 silc_socket_dup(server->router->connection),
1490 20, 1, SILC_TASK_TIMEOUT,
1491 SILC_TASK_PRI_NORMAL);
1492 silc_schedule_task_add(server->schedule, 0,
1493 silc_server_backup_check_status,
1494 silc_socket_dup(server->router->connection),
1495 60, 1, SILC_TASK_TIMEOUT,
1496 SILC_TASK_PRI_NORMAL);
1501 if (ctx->sock && ctx->sock->protocol)
1502 ctx->sock->protocol = NULL;
1504 silc_socket_free(ctx->sock); /* unref */
1505 silc_protocol_free(protocol);
1506 silc_free(ctx->sessions);