5 Author: Pekka Riikonen <priikone@silcnet.org>
7 Copyright (C) 2001 - 2007 Pekka Riikonen
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; version 2 of the License.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
21 #include "serverincludes.h"
22 #include "server_internal.h"
24 SILC_TASK_CALLBACK(silc_server_protocol_backup_done);
25 SILC_TASK_CALLBACK(silc_server_backup_announce_watches);
27 static void silc_server_backup_connect_primary(SilcServer server,
28 SilcServerEntry server_entry,
32 /************************** Types and Definitions ***************************/
36 SilcServerEntry server;
40 } SilcServerBackupEntry;
42 /* Holds IP address and port of the primary router that was replaced
47 SilcServerEntry server; /* Backup router that replaced the primary */
48 } SilcServerBackupReplaced;
51 struct SilcServerBackupStruct {
52 SilcServerBackupEntry *servers;
53 SilcUInt32 servers_count;
54 SilcServerBackupReplaced **replaced;
55 SilcUInt32 replaced_count;
61 SilcServerEntry server_entry;
62 } SilcServerBackupProtocolSession;
64 /* Backup resuming protocol context */
67 SilcPacketStream sock;
70 SilcServerBackupProtocolSession *sessions;
71 SilcUInt32 sessions_count;
72 SilcUInt32 initiator_restart;
75 unsigned int responder : 1;
76 unsigned int received_failure : 1;
77 unsigned int timeout : 1;
78 unsigned int error : 1;
79 } *SilcServerBackupProtocolContext;
82 /********************* Backup Configuration Routines ************************/
84 /* Adds the `backup_server' to be one of our backup router. This can be
85 called multiple times to set multiple backup routers. The `ip' and `port'
86 is the IP and port that the `backup_router' will replace if the `ip'
87 will become unresponsive. If `local' is TRUE then the `backup_server' is
88 in the local cell, if FALSE it is in some other cell. */
90 void silc_server_backup_add(SilcServer server, SilcServerEntry backup_server,
91 const char *ip, int port, SilcBool local)
98 if (!server->backup) {
99 server->backup = silc_calloc(1, sizeof(*server->backup));
104 /* See if already added */
105 for (i = 0; i < server->backup->servers_count; i++) {
106 if (server->backup->servers[i].server == backup_server)
110 SILC_LOG_DEBUG(("Backup router %s will replace %s",
111 backup_server->data.sconn ?
112 backup_server->data.sconn->remote_host : "(me)", ip));
114 for (i = 0; i < server->backup->servers_count; i++) {
115 if (!server->backup->servers[i].server) {
116 server->backup->servers[i].server = backup_server;
117 server->backup->servers[i].local = local;
118 server->backup->servers[i].port = SILC_SWAB_16(port);
119 memset(server->backup->servers[i].ip.data, 0,
120 sizeof(server->backup->servers[i].ip.data));
121 silc_net_addr2bin(ip, server->backup->servers[i].ip.data,
122 sizeof(server->backup->servers[i].ip.data));
127 i = server->backup->servers_count;
128 server->backup->servers = silc_realloc(server->backup->servers,
129 sizeof(*server->backup->servers) *
131 server->backup->servers[i].server = backup_server;
132 server->backup->servers[i].local = local;
133 server->backup->servers[i].port = SILC_SWAB_16(port);
134 memset(server->backup->servers[i].ip.data, 0,
135 sizeof(server->backup->servers[i].ip.data));
136 silc_net_addr2bin(ip, server->backup->servers[i].ip.data,
137 sizeof(server->backup->servers[i].ip.data));
138 server->backup->servers_count++;
141 /* Returns backup router for IP and port in `server_id' or NULL if there
142 does not exist backup router. */
144 SilcServerEntry silc_server_backup_get(SilcServer server,
145 SilcServerID *server_id)
152 for (i = 0; i < server->backup->servers_count; i++) {
153 if (server->backup->servers[i].server &&
154 server->backup->servers[i].port == server_id->port &&
155 !memcmp(server->backup->servers[i].ip.data, server_id->ip.data,
156 sizeof(server_id->ip.data))) {
157 SILC_LOG_DEBUG(("Found backup router %s for %s",
158 server->backup->servers[i].server->server_name,
159 silc_id_render(server_id, SILC_ID_SERVER)));
160 return server->backup->servers[i].server;
167 /* Deletes the backup server `server_entry'. */
169 void silc_server_backup_del(SilcServer server, SilcServerEntry server_entry)
176 for (i = 0; i < server->backup->servers_count; i++) {
177 if (server->backup->servers[i].server == server_entry) {
178 SILC_LOG_DEBUG(("Removing %s as backup router",
179 silc_id_render(server->backup->servers[i].server->id,
181 server->backup->servers[i].server = NULL;
182 memset(server->backup->servers[i].ip.data, 0,
183 sizeof(server->backup->servers[i].ip.data));
188 /* Frees all data allocated for backup routers. Call this after deleting
189 all backup routers and when new routers are added no more, for example
190 when shutting down the server. */
192 void silc_server_backup_free(SilcServer server)
199 /* Delete existing servers if caller didn't do it */
200 for (i = 0; i < server->backup->servers_count; i++) {
201 if (server->backup->servers[i].server)
202 silc_server_backup_del(server, server->backup->servers[i].server);
205 silc_free(server->backup->servers);
206 silc_free(server->backup);
207 server->backup = NULL;
210 /* Marks the IP address and port from the `server_id' as being replaced
211 by backup router indicated by the `server'. If the router connects at
212 a later time we can check whether it has been replaced by an backup
215 void silc_server_backup_replaced_add(SilcServer server,
216 SilcServerID *server_id,
217 SilcServerEntry server_entry)
220 SilcServerBackupReplaced *r = silc_calloc(1, sizeof(*r));;
223 server->backup = silc_calloc(1, sizeof(*server->backup));
224 if (!server->backup->replaced) {
225 server->backup->replaced =
226 silc_calloc(1, sizeof(*server->backup->replaced));
227 server->backup->replaced_count = 1;
230 SILC_LOG_DEBUG(("Replacing router %s with %s",
231 silc_id_render(server_id, SILC_ID_SERVER),
232 server_entry->server_name));
234 memcpy(&r->ip, &server_id->ip, sizeof(server_id->ip));
235 r->server = server_entry;
237 for (i = 0; i < server->backup->replaced_count; i++) {
238 if (!server->backup->replaced[i]) {
239 server->backup->replaced[i] = r;
244 i = server->backup->replaced_count;
245 server->backup->replaced = silc_realloc(server->backup->replaced,
246 sizeof(*server->backup->replaced) *
248 server->backup->replaced[i] = r;
249 server->backup->replaced_count++;
252 /* Checks whether the IP address and port from the `server_id' has been
253 replaced by an backup router. If it has been then this returns TRUE
254 and the bacup router entry to the `server' pointer if non-NULL. Returns
255 FALSE if the router is not replaced by backup router. */
257 SilcBool silc_server_backup_replaced_get(SilcServer server,
258 SilcServerID *server_id,
259 SilcServerEntry *server_entry)
263 if (!server->backup || !server->backup->replaced)
266 for (i = 0; i < server->backup->replaced_count; i++) {
267 if (!server->backup->replaced[i])
269 if (!memcmp(server->backup->replaced[i]->ip.data, server_id->ip.data,
270 sizeof(server_id->ip.data))) {
272 *server_entry = server->backup->replaced[i]->server;
273 SILC_LOG_DEBUG(("Router %s is replaced by %s",
274 silc_id_render(server_id, SILC_ID_SERVER),
275 server->backup->replaced[i]->server->server_name));
280 SILC_LOG_DEBUG(("Router %s is not replaced by backup router",
281 silc_id_render(server_id, SILC_ID_SERVER)));
285 /* Deletes a replaced host by the set `server_entry. */
287 void silc_server_backup_replaced_del(SilcServer server,
288 SilcServerEntry server_entry)
292 if (!server->backup || !server->backup->replaced)
295 for (i = 0; i < server->backup->replaced_count; i++) {
296 if (!server->backup->replaced[i])
298 if (server->backup->replaced[i]->server == server_entry) {
299 silc_free(server->backup->replaced[i]);
300 server->backup->replaced[i] = NULL;
305 /* Broadcast the received packet indicated by `packet' to all of our backup
306 routers. All router wide information is passed using broadcast packets.
307 That is why all backup routers need to get this data too. It is expected
308 that the caller already knows that the `packet' is broadcast packet. */
310 void silc_server_backup_broadcast(SilcServer server,
311 SilcPacketStream sender,
314 SilcServerEntry backup;
315 SilcPacketStream sock;
318 if (!server->backup || server->server_type != SILC_ROUTER)
321 SILC_LOG_DEBUG(("Broadcasting received packet to backup routers"));
323 for (i = 0; i < server->backup->servers_count; i++) {
324 backup = server->backup->servers[i].server;
326 if (!backup || backup->connection == sender ||
327 server->backup->servers[i].local == FALSE)
329 if (server->backup->servers[i].server == server->id_entry)
332 sock = backup->connection;
333 silc_server_packet_route(server, sock, packet);
337 /* A generic routine to send data to all backup routers. If the `sender'
338 is provided it will indicate the original sender of the packet and the
339 packet won't be resent to that entity. The `data' is the data that will
340 be assembled to packet context before sending. The packet will be
341 encrypted this function. If the `force_send' is TRUE the data is sent
342 immediately and not put to queue. If `local' is TRUE then the packet
343 will be sent only to local backup routers inside the cell. If false the
344 packet can go from one cell to the other. This function has no effect
345 if there are no any backup routers. */
347 void silc_server_backup_send(SilcServer server,
348 SilcServerEntry sender,
350 SilcPacketFlags flags,
356 SilcServerEntry backup;
357 SilcPacketStream sock;
360 if (!server->backup || server->server_type != SILC_ROUTER)
363 for (i = 0; i < server->backup->servers_count; i++) {
364 backup = server->backup->servers[i].server;
365 if (!backup || sender == backup)
367 if (local && server->backup->servers[i].local == FALSE)
369 if (server->backup->servers[i].server == server->id_entry)
372 sock = backup->connection;
374 silc_server_packet_send(server, backup->connection, type, flags,
379 /* Same as silc_server_backup_send but sets a specific Destination ID to
380 the packet. The Destination ID is indicated by the `dst_id' and the
381 ID type `dst_id_type'. For example, packets destined to channels must
382 be sent using this function. */
384 void silc_server_backup_send_dest(SilcServer server,
385 SilcServerEntry sender,
387 SilcPacketFlags flags,
389 SilcIdType dst_id_type,
395 SilcServerEntry backup;
396 SilcPacketStream sock;
399 if (!server->backup || server->server_type != SILC_ROUTER)
402 for (i = 0; i < server->backup->servers_count; i++) {
403 backup = server->backup->servers[i].server;
404 if (!backup || sender == backup)
406 if (local && server->backup->servers[i].local == FALSE)
408 if (server->backup->servers[i].server == server->id_entry)
411 sock = backup->connection;
413 silc_server_packet_send_dest(server, backup->connection, type, flags,
414 dst_id, dst_id_type, data, data_len);
418 /* Send the START_USE indication to remote connection. If `failure' is
419 TRUE then this sends SILC_PACKET_FAILURE. Otherwise it sends
420 SILC_PACKET_RESUME_ROUTER. */
422 void silc_server_backup_send_start_use(SilcServer server,
423 SilcPacketStream sock,
426 unsigned char data[4];
428 SILC_LOG_DEBUG(("Sending START_USE (%s)",
429 failure ? "failure" : "success"));
432 SILC_PUT32_MSB(SILC_SERVER_BACKUP_START_USE, data);
433 silc_server_packet_send(server, sock, SILC_PACKET_FAILURE, 0,
436 data[0] = SILC_SERVER_BACKUP_START_USE;
438 silc_server_packet_send(server, sock,
439 SILC_PACKET_RESUME_ROUTER, 0,
444 /* Send the REPLACED indication to remote router. This is send by the
445 primary router (remote router) of the primary router that came back
446 online. This is not sent by backup router or any other server. */
448 void silc_server_backup_send_replaced(SilcServer server,
449 SilcPacketStream sock)
451 unsigned char data[4];
453 SILC_LOG_DEBUG(("Sending REPLACED"));
455 data[0] = SILC_SERVER_BACKUP_REPLACED;
457 silc_server_packet_send(server, sock,
458 SILC_PACKET_RESUME_ROUTER, 0,
463 /************************ Backup Resuming Protocol **************************/
465 /* Timeout callback for protocol */
467 SILC_TASK_CALLBACK(silc_server_backup_timeout)
469 SilcServerBackupProtocolContext ctx = context;
470 SilcServer server = app_context;
472 SILC_LOG_INFO(("Timeout occurred during backup resuming protocol"));
475 silc_schedule_task_add_timeout(server->schedule,
476 silc_server_protocol_backup_done, context,
480 /* Callback to start the protocol as responder */
482 SILC_TASK_CALLBACK(silc_server_backup_responder_start)
484 SilcServerBackupProtocolContext proto_ctx = context;
485 SilcPacketStream sock = proto_ctx->sock;
486 SilcIDListData idata = silc_packet_get_context(sock);
487 SilcServer server = app_context;
489 /* If other protocol is executing at the same time, start with timeout. */
490 if (idata->sconn->op) {
491 SILC_LOG_DEBUG(("Other protocol is executing, wait for it to finish"));
492 silc_schedule_task_add_timeout(server->schedule,
493 silc_server_backup_responder_start,
498 /* Register protocol timeout */
499 silc_schedule_task_add_timeout(server->schedule,
500 silc_server_backup_timeout,
503 /* Run the backup resuming protocol */
504 silc_schedule_task_add_timeout(server->schedule,
505 silc_server_protocol_backup,
509 /* Callback to send START_USE to backup to check whether using backup
512 SILC_TASK_CALLBACK(silc_server_backup_check_status)
514 SilcPacketStream sock = context;
515 SilcServer server = app_context;
517 /* Check whether we are still using backup */
518 if (!server->backup_primary)
521 silc_server_backup_send_start_use(server, sock, FALSE);
522 silc_packet_stream_unref(sock);
527 SilcPacketStream sock;
529 } *SilcServerBackupPing;
531 /* PING command reply callback */
533 void silc_server_backup_ping_reply(void *context, void *reply)
535 SilcServerBackupPing pc = context;
536 SilcServerCommandReplyContext cmdr = reply;
538 if (cmdr && !silc_command_get_status(cmdr->payload, NULL, NULL)) {
539 /* Timeout error occurred, the primary is really down. */
540 SilcPacketStream primary = SILC_PRIMARY_ROUTE(pc->server);
542 SILC_LOG_DEBUG(("PING timeout, primary is down"));
545 silc_server_free_sock_user_data(pc->server, primary, NULL);
546 silc_server_close_connection(pc->server, primary);
549 /* Reprocess the RESUME_ROUTER packet */
550 silc_server_backup_resume_router(pc->server, pc->sock, pc->packet);
552 /* The primary is not down, refuse to serve the server as primary */
553 SILC_LOG_DEBUG(("PING received, primary is up"));
554 silc_server_backup_send_start_use(pc->server, pc->sock, TRUE);
555 silc_packet_free(pc->packet);
558 silc_packet_stream_unref(pc->sock);
562 /* Processes incoming RESUME_ROUTER packet. This can give the packet
563 for processing to the protocol handler or allocate new protocol if
564 start command is received. */
566 void silc_server_backup_resume_router(SilcServer server,
567 SilcPacketStream sock,
570 SilcIDListData idata = silc_packet_get_context(sock);
571 SilcServerEntry router = (SilcServerEntry)idata;
572 SilcUInt8 type, session;
573 SilcServerBackupProtocolContext ctx;
576 SILC_LOG_DEBUG(("Received RESUME_ROUTER packet"));
578 if (idata->conn_type == SILC_CONN_CLIENT ||
579 idata->conn_type == SILC_CONN_UNKNOWN) {
580 SILC_LOG_DEBUG(("Bad packet received"));
581 silc_packet_free(packet);
585 ret = silc_buffer_unformat(&packet->buffer,
586 SILC_STR_UI_CHAR(&type),
587 SILC_STR_UI_CHAR(&session),
590 SILC_LOG_ERROR(("Malformed resume router packet received"));
591 silc_packet_free(packet);
595 /* Check whether this packet is used to tell us that server will start
596 using us as primary router. */
597 if (type == SILC_SERVER_BACKUP_START_USE) {
599 SilcServerBackupPing pc;
601 /* If we are normal server then backup router has sent us back
602 this reply and we use the backup as primary router now. */
603 if (server->server_type == SILC_SERVER) {
604 /* Nothing to do here actually, since we have switched already. */
605 SILC_LOG_DEBUG(("Received successful START_USE from backup router"));
606 silc_packet_free(packet);
610 /* Backup router following. */
612 /* If we are marked as router then the primary is down and we send
613 success START_USE back to the server. */
614 if (server->server_type == SILC_ROUTER) {
615 SILC_LOG_DEBUG(("Sending success START_USE back"));
616 silc_server_backup_send_start_use(server, sock, FALSE);
617 silc_packet_free(packet);
621 /* We have just lost primary, send success START_USE back */
622 if (server->standalone) {
623 SILC_LOG_DEBUG(("We are stanalone, sending success START_USE back"));
624 silc_server_backup_send_start_use(server, sock, FALSE);
625 silc_packet_free(packet);
629 /* We are backup router. This server claims that our primary is down.
630 We will check this ourselves by sending PING command to the primary. */
631 SILC_LOG_DEBUG(("Sending PING to detect status of primary router"));
632 idp = silc_id_payload_encode(server->router->id, SILC_ID_SERVER);
633 silc_server_send_command(server, SILC_PRIMARY_ROUTE(server),
634 SILC_COMMAND_PING, ++server->cmd_ident, 1,
635 1, idp->data, silc_buffer_len(idp));
636 silc_buffer_free(idp);
638 /* Reprocess this packet after received reply from router */
639 pc = silc_calloc(1, sizeof(*pc));
643 silc_packet_stream_ref(sock);
644 silc_server_command_pending_timed(server, SILC_COMMAND_PING,
646 silc_server_backup_ping_reply, pc, 15);
650 /* Start the resuming protocol if requested. */
651 if (type == SILC_SERVER_BACKUP_START) {
652 /* We have received a start for resuming protocol. We are either
653 primary router that came back online or normal server. */
654 SilcServerBackupProtocolContext proto_ctx;
656 /* If backup had closed the connection earlier we won't allow resuming
657 since we (primary router) have never gone away. */
658 if (server->server_type == SILC_ROUTER && !server->backup_router &&
659 server->backup_closed) {
660 unsigned char data[4];
661 SILC_LOG_DEBUG(("Backup resuming not allowed since we are still "
663 SILC_LOG_INFO(("Backup resuming not allowed since we are still "
665 SILC_PUT32_MSB(SILC_SERVER_BACKUP_START, data);
666 silc_server_packet_send(server, sock, SILC_PACKET_FAILURE, 0,
668 server->backup_closed = FALSE;
669 silc_packet_free(packet);
673 proto_ctx = silc_calloc(1, sizeof(*proto_ctx));
674 proto_ctx->server = server;
675 proto_ctx->sock = sock;
676 proto_ctx->responder = TRUE;
677 proto_ctx->type = type;
678 proto_ctx->session = session;
679 proto_ctx->start = time(0);
680 silc_packet_stream_ref(sock);
681 router->backup = TRUE;
682 router->backup_proto = proto_ctx;
684 SILC_LOG_DEBUG(("Starting backup resuming protocol as responder"));
685 SILC_LOG_INFO(("Starting backup resuming protocol"));
687 /* Start protocol immediately */
688 silc_schedule_task_add_timeout(server->schedule,
689 silc_server_backup_responder_start,
694 /* If we are router and the packet is coming from our primary router
695 then it means we have been replaced by an backup router in our cell. */
696 if (type == SILC_SERVER_BACKUP_REPLACED &&
697 server->server_type == SILC_ROUTER &&
698 idata->conn_type == SILC_CONN_ROUTER &&
699 SILC_PRIMARY_ROUTE(server) == sock) {
700 /* We have been replaced by an backup router in our cell. We must
701 mark our primary router connection disabled since we are not allowed
702 to use it at this moment. */
703 SILC_LOG_INFO(("We are replaced by an backup router in this cell, will "
704 "wait until backup resuming protocol is executed"));
705 idata->status |= SILC_IDLIST_STATUS_DISABLED;
706 silc_packet_free(packet);
710 /* Activate the shared protocol context for this socket connection
712 if (type == SILC_SERVER_BACKUP_RESUMED &&
713 idata->conn_type == SILC_CONN_ROUTER && !router->backup &&
714 idata->status & SILC_IDLIST_STATUS_DISABLED) {
715 SilcServerEntry backup_router;
717 if (silc_server_backup_replaced_get(server, router->id, &backup_router)) {
718 ctx = backup_router->backup_proto;
720 silc_packet_stream_unref(ctx->sock);
721 router->backup = TRUE;
722 router->backup_proto = ctx;
724 silc_packet_stream_ref(sock);
728 /* Call the resuming protocol if the protocol is active. */
729 if (router->backup) {
730 ctx = router->backup_proto;
733 for (i = 0; i < ctx->sessions_count; i++) {
734 if (session == ctx->sessions[i].session) {
735 ctx->session = session;
736 silc_schedule_task_add_timeout(server->schedule,
737 silc_server_protocol_backup,
739 silc_packet_free(packet);
744 /* If RESUMED received the session ID is zero, execute the protocol. */
745 if (type == SILC_SERVER_BACKUP_RESUMED) {
746 silc_schedule_task_add_timeout(server->schedule,
747 silc_server_protocol_backup,
749 silc_packet_free(packet);
753 SILC_LOG_ERROR(("Unknown backup resuming session %d", session));
754 silc_packet_free(packet);
758 silc_packet_free(packet);
761 /* Task that is called after backup router has connected back to
762 primary router and we are starting the resuming protocol */
764 SILC_TASK_CALLBACK(silc_server_backup_connected_later)
766 SilcServerBackupProtocolContext proto_ctx =
767 (SilcServerBackupProtocolContext)context;
768 SilcServer server = proto_ctx->server;
770 SILC_LOG_DEBUG(("Starting backup resuming protocol as initiator"));
771 SILC_LOG_INFO(("Starting backup resuming protocol"));
773 /* Register protocol timeout */
774 silc_schedule_task_add_timeout(server->schedule,
775 silc_server_backup_timeout,
778 /* Run the backup resuming protocol */
779 silc_schedule_task_add_timeout(server->schedule,
780 silc_server_protocol_backup,
784 SILC_TASK_CALLBACK(silc_server_backup_connected_again)
786 SilcServer server = app_context;
787 SilcServerConfigRouter *primary;
789 SILC_LOG_DEBUG(("Reconnecting"));
791 if (server->server_shutdown)
794 primary = silc_server_config_get_primary_router(server);
796 if (!silc_server_find_socket_by_host(server, SILC_CONN_ROUTER,
797 primary->host, primary->port))
798 silc_server_create_connection(server, TRUE, FALSE,
799 primary->host, primary->port,
800 silc_server_backup_connected,
805 /* Called when we've established connection back to our primary router
806 when we've acting as backup router and have replaced the primary router
807 in the cell. This function will start the backup resuming protocol. */
809 void silc_server_backup_connected(SilcServer server,
810 SilcServerEntry server_entry,
813 SilcServerBackupProtocolContext proto_ctx;
814 SilcPacketStream sock;
818 SILC_LOG_DEBUG(("Connecting failed"));
819 silc_schedule_task_add_timeout(server->schedule,
820 silc_server_backup_connected_again,
825 sock = server_entry->connection;
826 proto_ctx = silc_calloc(1, sizeof(*proto_ctx));
827 proto_ctx->server = server;
828 proto_ctx->sock = sock;
829 proto_ctx->responder = FALSE;
830 proto_ctx->type = SILC_SERVER_BACKUP_START;
831 proto_ctx->start = time(0);
832 silc_packet_stream_ref(sock);
834 /* Start through scheduler */
835 silc_schedule_task_add_timeout(server->schedule,
836 silc_server_backup_connected_later,
840 SILC_TASK_CALLBACK(silc_server_backup_connect_primary_again)
842 SilcServer server = app_context;
843 SilcServerConfigRouter *primary;
845 primary = silc_server_config_get_primary_router(server);
847 if (!silc_server_find_socket_by_host(server, SILC_CONN_ROUTER,
848 primary->host, primary->port))
849 silc_server_create_connection(server, TRUE, FALSE,
850 primary->host, primary->port,
851 silc_server_backup_connect_primary,
856 /* Called when normal server has connected to its primary router after
857 backup router has sent the START packet in reusming protocol. We will
858 move the protocol context from the backup router connection to the
861 static void silc_server_backup_connect_primary(SilcServer server,
862 SilcServerEntry server_entry,
865 SilcPacketStream backup_router = context;
866 SilcIDListData idata = silc_packet_get_context(backup_router);
867 SilcServerEntry router = (SilcServerEntry)idata;
868 SilcServerBackupProtocolContext ctx;
869 SilcPacketStream sock;
870 unsigned char data[2];
874 silc_schedule_task_add_timeout(server->schedule,
875 silc_server_backup_connect_primary_again,
880 if (!router->backup || !server_entry->connection) {
881 silc_packet_stream_unref(backup_router);
885 ctx = router->backup_proto;
886 sock = server_entry->connection;
887 idata = (SilcIDListData)server_entry;
889 SILC_LOG_DEBUG(("Sending CONNECTED packet (session %d)", ctx->session));
890 SILC_LOG_INFO(("Sending CONNECTED (session %d) to backup router",
893 /* Send the CONNECTED packet back to the backup router. */
894 data[0] = SILC_SERVER_BACKUP_CONNECTED;
895 data[1] = ctx->session;
896 silc_server_packet_send(server, backup_router,
897 SILC_PACKET_RESUME_ROUTER, 0, data, 2);
899 /* The primary connection is disabled until it sends the RESUMED packet
901 idata->status |= SILC_IDLIST_STATUS_DISABLED;
903 /* Move this protocol context from this backup router connection to
904 the primary router connection since it will send the subsequent
905 packets in this protocol. We don't talk with backup router
908 silc_packet_stream_unref(ctx->sock);
910 silc_packet_stream_ref(sock);
911 server_entry->backup = TRUE;
912 server_entry->backup_proto = ctx;
913 router->backup = FALSE;
914 router->backup_proto = NULL;
917 silc_packet_stream_unref(backup_router);
920 /* Timeout callback used by the backup router to send the ENDING packet
921 to primary router to indicate that it can now resume as being primary
922 router. All CONNECTED packets has been received when we reach this. */
924 SILC_TASK_CALLBACK(silc_server_backup_send_resumed)
926 SilcServerBackupProtocolContext ctx = context;
927 SilcServer server = ctx->server;
928 unsigned char data[2];
931 SILC_LOG_DEBUG(("Start"));
933 for (i = 0; i < ctx->sessions_count; i++)
934 if (ctx->sessions[i].server_entry == silc_packet_get_context(ctx->sock))
935 ctx->session = ctx->sessions[i].session;
937 /* We've received all the CONNECTED packets and now we'll send the
938 ENDING packet to the new primary router. */
939 data[0] = SILC_SERVER_BACKUP_ENDING;
940 data[1] = ctx->session;
941 silc_server_packet_send(server, ctx->sock, SILC_PACKET_RESUME_ROUTER, 0,
944 /* The protocol will go to END state. */
948 /* Backup resuming protocol. This protocol is executed when the primary
949 router wants to resume its position as being primary router. */
951 SILC_TASK_CALLBACK(silc_server_protocol_backup)
953 SilcServerBackupProtocolContext ctx = context;
954 SilcServer server = ctx->server;
955 SilcServerEntry server_entry = NULL;
956 SilcPacketStream sock = NULL;
957 unsigned char data[2];
966 if (ctx->responder == FALSE) {
968 * Initiator (backup router)
971 /* Send the START packet to primary router and normal servers. The
972 packet will indicate to the primary router that it has been replaced
973 by us. For normal servers it means that we will be resigning as
974 being primary router shortly. */
975 list = silc_packet_engine_get_streams(server->packet_engine);
979 silc_dlist_start(list);
980 while ((sock = silc_dlist_get(list))) {
981 server_entry = silc_packet_get_context(sock);
983 if (!server_entry || server_entry == server->id_entry ||
984 (server_entry->data.conn_type != SILC_CONN_ROUTER &&
985 server_entry->data.conn_type != SILC_CONN_SERVER))
988 if (server_entry->data.status & SILC_IDLIST_STATUS_DISABLED)
991 ctx->sessions = silc_realloc(ctx->sessions,
992 sizeof(*ctx->sessions) *
993 (ctx->sessions_count + 1));
994 ctx->sessions[ctx->sessions_count].session = ctx->sessions_count;
995 ctx->sessions[ctx->sessions_count].connected = FALSE;
996 ctx->sessions[ctx->sessions_count].server_entry = server_entry;
998 SILC_LOG_DEBUG(("Sending START to %s (session %d)",
999 server_entry->server_name, ctx->sessions_count));
1000 SILC_LOG_INFO(("Expecting CONNECTED from %s (session %d)",
1001 server_entry->server_name, ctx->sessions_count));
1003 /* This connection is performing this protocol too now */
1004 server_entry->backup = TRUE;
1005 server_entry->backup_proto = ctx;
1007 data[0] = SILC_SERVER_BACKUP_START;
1008 data[1] = ctx->sessions_count;
1009 silc_server_packet_send(server, sock, SILC_PACKET_RESUME_ROUTER, 0,
1010 data, sizeof(data));
1011 ctx->sessions_count++;
1013 silc_packet_engine_free_streams_list(list);
1015 /* Announce data to the new primary to be. */
1016 silc_server_announce_servers(server, TRUE, 0, ctx->sock);
1017 silc_server_announce_clients(server, 0, ctx->sock);
1018 silc_server_announce_channels(server, 0, ctx->sock);
1024 * Responder (all servers and routers)
1026 SilcServerConfigRouter *primary;
1028 /* We should have received START packet */
1029 if (ctx->type != SILC_SERVER_BACKUP_START) {
1030 SILC_LOG_ERROR(("Bad resume router packet START %d", ctx->type));
1034 /* Connect to the primary router that was down that is now supposed
1035 to be back online. We send the CONNECTED packet after we've
1036 established the connection to the primary router. */
1037 primary = silc_server_config_get_primary_router(server);
1038 if (primary && server->backup_primary &&
1039 !silc_server_num_sockets_by_remote(server,
1040 silc_net_is_ip(primary->host) ?
1041 primary->host : NULL,
1042 silc_net_is_ip(primary->host) ?
1043 NULL : primary->host,
1045 SILC_CONN_ROUTER)) {
1046 SILC_LOG_DEBUG(("Received START (session %d), reconnect to router",
1048 silc_packet_stream_ref(ctx->sock);
1049 silc_server_create_connection(server, TRUE, FALSE,
1050 primary->host, primary->port,
1051 silc_server_backup_connect_primary,
1054 /* Nowhere to connect just return the CONNECTED packet */
1055 SILC_LOG_DEBUG(("Received START (session %d), send CONNECTED back",
1057 SILC_LOG_INFO(("Sending CONNECTED (session %d) to backup router",
1060 /* Send the CONNECTED packet back to the backup router. */
1061 data[0] = SILC_SERVER_BACKUP_CONNECTED;
1062 data[1] = ctx->session;
1063 silc_server_packet_send(server, ctx->sock,
1064 SILC_PACKET_RESUME_ROUTER, 0,
1065 data, sizeof(data));
1068 /* Add this resuming session */
1069 ctx->sessions = silc_realloc(ctx->sessions,
1070 sizeof(*ctx->sessions) *
1071 (ctx->sessions_count + 1));
1072 ctx->sessions[ctx->sessions_count].session = ctx->session;
1073 ctx->sessions_count++;
1075 /* Normal server goes directly to the END state. */
1076 if (server->server_type == SILC_ROUTER &&
1078 server->router->data.status & SILC_IDLIST_STATUS_DISABLED))
1086 if (ctx->responder == FALSE) {
1088 * Initiator (backup router)
1091 /* We should have received CONNECTED packet */
1092 if (ctx->type != SILC_SERVER_BACKUP_CONNECTED) {
1093 SILC_LOG_ERROR(("Bad resume router packet CONNECTED %d", ctx->type));
1097 for (i = 0; i < ctx->sessions_count; i++) {
1098 if (ctx->sessions[i].session == ctx->session) {
1099 ctx->sessions[i].connected = TRUE;
1100 SILC_LOG_INFO(("Received CONNECTED from %s (session %d)",
1101 ctx->sessions[i].server_entry->server_name,
1103 SILC_LOG_DEBUG(("Received CONNECTED (session %d)", ctx->session));
1108 /* See if all returned CONNECTED, if not, then continue waiting. */
1109 for (i = 0; i < ctx->sessions_count; i++) {
1110 if (!ctx->sessions[i].connected)
1114 SILC_LOG_INFO(("All sessions have returned CONNECTED packets, "
1116 SILC_LOG_DEBUG(("Sending ENDING packet to primary router"));
1118 /* The ENDING is sent with timeout, and then we continue to the
1119 END state in the protocol. */
1120 silc_schedule_task_add_timeout(server->schedule,
1121 silc_server_backup_send_resumed,
1127 * Responder (primary router)
1130 /* We should have been received ENDING packet */
1131 if (ctx->type != SILC_SERVER_BACKUP_ENDING) {
1132 SILC_LOG_ERROR(("Bad resume router packet ENDING %d", ctx->type));
1136 SILC_LOG_DEBUG(("Received ENDING packet, we are going to resume now"));
1138 /* Switch announced informations to our primary router of using the
1140 silc_server_local_servers_toggle_enabled(server, TRUE);
1141 silc_server_update_servers_by_server(server,
1142 silc_packet_get_context(ctx->sock),
1144 silc_server_update_clients_by_server(server,
1145 silc_packet_get_context(ctx->sock),
1146 server->router, TRUE);
1148 /* We as primary router now must send RESUMED packets to all servers
1149 and routers so that they know we are back. For backup router we
1150 send the packet last so that we give the backup as much time as
1151 possible to deal with message routing at this critical moment. */
1152 list = silc_packet_engine_get_streams(server->packet_engine);
1156 silc_dlist_start(list);
1157 while ((sock = silc_dlist_get(list))) {
1158 server_entry = silc_packet_get_context(sock);
1160 if (!server_entry || server_entry == server->id_entry ||
1161 (server_entry->data.conn_type != SILC_CONN_ROUTER &&
1162 server_entry->data.conn_type != SILC_CONN_SERVER))
1165 /* Send to backup last */
1166 if (sock == ctx->sock)
1169 server_entry = silc_packet_get_context(sock);
1170 server_entry->data.status &= ~SILC_IDLIST_STATUS_DISABLED;
1172 SILC_LOG_DEBUG(("Sending RESUMED to %s", server_entry->server_name));
1173 SILC_LOG_INFO(("Sending RESUMED to %s", server_entry->server_name));
1175 /* This connection is performing this protocol too now */
1176 server_entry->backup = TRUE;
1177 server_entry->backup_proto = ctx;
1179 data[0] = SILC_SERVER_BACKUP_RESUMED;
1181 silc_server_packet_send(server, sock, SILC_PACKET_RESUME_ROUTER, 0,
1182 data, sizeof(data));
1185 /* Now send the same packet to backup */
1186 if (sock != ctx->sock) {
1189 server_entry = silc_packet_get_context(sock);
1190 server_entry->data.status &= ~SILC_IDLIST_STATUS_DISABLED;
1192 SILC_LOG_DEBUG(("Sending RESUMED to %s", server_entry->server_name));
1193 SILC_LOG_INFO(("Sending RESUMED to %s", server_entry->server_name));
1195 /* This connection is performing this protocol too now */
1196 server_entry->backup = TRUE;
1197 server_entry->backup_proto = ctx;
1199 data[0] = SILC_SERVER_BACKUP_RESUMED;
1201 silc_server_packet_send(server, sock, SILC_PACKET_RESUME_ROUTER, 0,
1202 data, sizeof(data));
1204 silc_packet_engine_free_streams_list(list);
1206 /* We are now resumed and are back as primary router in the cell. */
1207 SILC_LOG_INFO(("We are now the primary router of our cell again"));
1208 server->wait_backup = FALSE;
1210 /* Announce WATCH list a little later */
1211 silc_packet_stream_ref(ctx->sock);
1212 silc_schedule_task_add_timeout(server->schedule,
1213 silc_server_backup_announce_watches,
1216 /* For us this is the end of this protocol. */
1217 silc_schedule_task_add_timeout(server->schedule,
1218 silc_server_protocol_backup_done,
1226 * Responder (backup router, servers, and remote router)
1228 SilcServerEntry router, backup_router;
1230 /* We should have been received RESUMED from our primary router. */
1231 if (ctx->type != SILC_SERVER_BACKUP_RESUMED) {
1232 SILC_LOG_ERROR(("Bad resume router packet RESUMED %d", ctx->type));
1236 SILC_LOG_INFO(("Received RESUMED from new primary router"));
1238 /* If we are the backup router, mark that we are no longer primary
1239 but are back to backup router status. */
1240 if (server->backup_router)
1241 server->server_type = SILC_BACKUP_ROUTER;
1243 /* We have now new primary router. All traffic goes there from now on. */
1244 router = silc_packet_get_context(ctx->sock);
1245 if (silc_server_backup_replaced_get(server, router->id,
1248 if (backup_router == server->router) {
1249 /* We have new primary router now */
1250 server->id_entry->router = router;
1251 server->router = router;
1252 SILC_LOG_INFO(("Switching back to primary router %s",
1253 server->router->server_name));
1255 /* We are connected to new primary and now continue using it */
1256 SILC_LOG_INFO(("Resuming the use of primary router %s",
1257 router->server_name));
1259 server->backup_primary = FALSE;
1260 sock = router->connection;
1262 /* Update the client entries of the backup router to the new
1264 silc_server_local_servers_toggle_enabled(server, FALSE);
1265 router->data.status &= ~SILC_IDLIST_STATUS_DISABLED;
1266 silc_server_update_servers_by_server(server, backup_router, router);
1267 silc_server_update_clients_by_server(
1268 server, NULL, router,
1269 server->server_type == SILC_BACKUP_ROUTER);
1270 if (server->server_type == SILC_SERVER)
1271 silc_server_update_channels_by_server(server, backup_router, router);
1272 silc_server_backup_replaced_del(server, backup_router);
1275 /* Send notify about primary router going down to local operators */
1276 SILC_SERVER_SEND_OPERS(server, FALSE, TRUE,
1277 SILC_NOTIFY_TYPE_NONE,
1278 ("%s resumed the use of primary router %s",
1279 server->server_name,
1280 server->router->server_name));
1282 /* Protocol has ended, call the final callback */
1283 silc_schedule_task_add_timeout(server->schedule,
1284 silc_server_protocol_backup_done,
1290 /* Protocol has ended, call the final callback */
1291 silc_schedule_task_add_timeout(server->schedule,
1292 silc_server_protocol_backup_done,
1297 /* Protocol has ended, call the final callback */
1298 SILC_LOG_ERROR(("Error during backup resume: received Failure"));
1299 ctx->received_failure = TRUE;
1300 silc_schedule_task_add_timeout(server->schedule,
1301 silc_server_protocol_backup_done,
1310 /* Final resuming protocol completion callback */
1312 SILC_TASK_CALLBACK(silc_server_protocol_backup_done)
1314 SilcServerBackupProtocolContext ctx = context;
1315 SilcServer server = ctx->server;
1317 SilcServerEntry server_entry;
1318 SilcPacketStream sock;
1321 silc_schedule_task_del_by_context(server->schedule, ctx);
1326 SILC_LOG_ERROR(("Error occurred during backup router resuming protcool"));
1328 if (server->server_shutdown)
1331 /* Remove this protocol from all server entries that has it */
1332 list = silc_packet_engine_get_streams(server->packet_engine);
1336 silc_dlist_start(list);
1337 while ((sock = silc_dlist_get(list))) {
1338 server_entry = silc_packet_get_context(sock);
1342 if (server_entry->data.conn_type != SILC_CONN_ROUTER &&
1343 server_entry->data.conn_type != SILC_CONN_SERVER)
1346 if (server_entry->backup_proto == ctx) {
1349 if (server->server_type == SILC_SERVER &&
1350 server_entry->server_type == SILC_ROUTER)
1354 if (SILC_PRIMARY_ROUTE(server) == sock && server->backup_router) {
1355 if (ctx->sock == sock) {
1356 silc_packet_stream_unref(sock);
1360 /* If failed after 10 attempts, it won't work, give up */
1361 if (ctx->initiator_restart > 10)
1362 ctx->received_failure = TRUE;
1364 if (!ctx->received_failure) {
1365 /* Protocol error, probably timeout. Just restart the protocol. */
1366 SilcServerBackupProtocolContext proto_ctx;
1368 /* Restart the protocol. */
1369 proto_ctx = silc_calloc(1, sizeof(*proto_ctx));
1370 proto_ctx->server = server;
1371 proto_ctx->sock = sock;
1372 proto_ctx->responder = FALSE;
1373 proto_ctx->type = SILC_SERVER_BACKUP_START;
1374 proto_ctx->start = time(0);
1375 proto_ctx->initiator_restart = ctx->initiator_restart + 1;
1376 silc_packet_stream_ref(sock);
1378 /* Start through scheduler */
1379 silc_schedule_task_add_timeout(server->schedule,
1380 silc_server_backup_connected_later,
1383 /* If failure was received, switch back to normal backup router.
1384 For some reason primary wouldn't accept that we were supposed
1385 to perfom resuming protocol. */
1386 server->server_type = SILC_BACKUP_ROUTER;
1387 silc_server_local_servers_toggle_enabled(server, FALSE);
1388 server_entry->data.status &= ~SILC_IDLIST_STATUS_DISABLED;
1389 silc_server_update_servers_by_server(server, server->id_entry,
1390 silc_packet_get_context(sock));
1391 silc_server_update_clients_by_server(server, NULL,
1392 silc_packet_get_context(sock),
1395 /* Announce our clients and channels to the router */
1396 silc_server_announce_clients(server, 0, sock);
1397 silc_server_announce_channels(server, 0, sock);
1399 /* Announce WATCH list a little later */
1400 silc_packet_stream_ref(sock);
1401 silc_schedule_task_add_timeout(server->schedule,
1402 silc_server_backup_announce_watches,
1410 server_entry->data.status &= ~SILC_IDLIST_STATUS_DISABLED;
1413 silc_packet_engine_free_streams_list(list);
1416 SILC_LOG_INFO(("Backup resuming protocol ended successfully"));
1418 if (ctx->type == SILC_SERVER_BACKUP_RESUMED && server->router) {
1419 /* Announce all of our information to the router. */
1420 if (server->server_type == SILC_ROUTER)
1421 silc_server_announce_servers(server, FALSE, 0,
1422 server->router->connection);
1424 /* Announce our clients and channels to the router */
1425 silc_server_announce_clients(server, 0, server->router->connection);
1426 silc_server_announce_channels(server, 0, server->router->connection);
1428 /* Announce WATCH list a little later */
1429 silc_packet_stream_ref(server->router->connection);
1430 silc_schedule_task_add_timeout(server->schedule,
1431 silc_server_backup_announce_watches,
1432 server->router->connection, 4, 0);
1437 if (server->server_type == SILC_SERVER) {
1438 /* If we are still using backup router Send confirmation to backup
1439 that using it is still ok and continue sending traffic there.
1440 The backup will reply with error if it's not ok. */
1441 if (server->router && server->backup_primary) {
1442 /* Send START_USE just in case using backup wouldn't be ok. */
1443 silc_server_backup_send_start_use(server, server->router->connection,
1446 /* Check couple of times same START_USE just in case. */
1447 silc_packet_stream_ref(server->router->connection);
1448 silc_schedule_task_add_timeout(server->schedule,
1449 silc_server_backup_check_status,
1450 server->router->connection,
1452 silc_packet_stream_ref(server->router->connection);
1453 silc_schedule_task_add_timeout(server->schedule,
1454 silc_server_backup_check_status,
1455 server->router->connection,
1457 silc_packet_stream_ref(server->router->connection);
1458 silc_schedule_task_add_timeout(server->schedule,
1459 silc_server_backup_check_status,
1460 server->router->connection,
1467 SilcServerEntry r = silc_packet_get_context(ctx->sock);
1470 r->backup_proto = NULL;
1472 silc_packet_stream_unref(ctx->sock);
1474 silc_free(ctx->sessions);
1478 SILC_TASK_CALLBACK(silc_server_backup_announce_watches)
1480 SilcPacketStream sock = context;
1481 SilcServer server = app_context;
1482 if (silc_packet_stream_is_valid(sock))
1483 silc_server_announce_watches(server, sock);
1484 silc_packet_stream_unref(sock);