文章

FRR BGP源码分析6 -- ZEBRA初始化

FRR BGP源码分析6 -- ZEBRA初始化

zebra,翻译是斑马,它负责管理其他所有协议进程的路由信息的更新与交互,并负责与内核交换信息,整体的架构如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
+------+  +------+  +------+  +------+  +------+  +------+  +------+
| bgpd |  | ripd |  | ospfd|  | ldpd |  | pbrd |  | pimd |  | ...  |
+------+  +------+  +------+  +------+  +------+  +------+  +------+
    |         |         |         |         |         |         |
+---v---------v---------v---------v---------v---------v---------v--+
|                                                                  |
|                              Zebra                               |
|                                                                  |
+------------------------------------------------------------------+
       |                         |                         |
       |                         |                         |
+------v-------+        +--------v---------+        +------v-------+
|              |        |                  |        |              |
| LINIX Kernel |        | Remote dataplane |        |    ......    |
|              |        |                  |        |              |
+--------------+        +------------------+        +--------------+

Zebra的初始化在zebra/main.c里面,查看main函数即可

frr_init

frr_init创建zebra主进程的master数据结构,用来做事件驱动,我们可以看下event_loop的数据结构。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
/* Master of the theads. */
struct event_loop {
	char *name;

	struct event **read;
	struct event **write;
	struct event_timer_list_head timer;
	struct event_list_head event, ready, unuse;
	struct list *cancel_req;
	bool canceled;
	pthread_cond_t cancel_cond;
	struct hash *cpu_record;
	int io_pipe[2];
	int fd_limit;
	struct fd_handler handler;
	unsigned long alloc;
	long selectpoll_timeout;
	bool spin;
	bool handle_signals;
	pthread_mutex_t mtx;
	pthread_t owner;

	bool ready_run_loop;
	RUSAGE_T last_getrusage;
};

其整合了事件的可读、可写、定时器、信号的处理,后面有时间可以来学习下。

  • frr_pthread_init 初始化所有的线程链表

zebra_router_init

初始化和策略路由PBR ?? 相关的HASH

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
void zebra_router_init(bool asic_offload, bool notify_on_ack)
{
	zrouter.sequence_num = 0;

	zrouter.allow_delete = false;

	zrouter.packets_to_process = ZEBRA_ZAPI_PACKETS_TO_PROCESS;

	zrouter.nhg_keep = ZEBRA_DEFAULT_NHG_KEEP_TIMER;

	zebra_vxlan_init();
	zebra_mlag_init();
	zebra_neigh_init();

	zrouter.rules_hash = hash_create_size(8, zebra_pbr_rules_hash_key,
					      zebra_pbr_rules_hash_equal,
					      "Rules Hash");

	zrouter.ipset_hash =
		hash_create_size(8, zebra_pbr_ipset_hash_key,
				 zebra_pbr_ipset_hash_equal, "IPset Hash");

	zrouter.ipset_entry_hash = hash_create_size(
		8, zebra_pbr_ipset_entry_hash_key,
		zebra_pbr_ipset_entry_hash_equal, "IPset Hash Entry");

	zrouter.iptable_hash = hash_create_size(8, zebra_pbr_iptable_hash_key,
						zebra_pbr_iptable_hash_equal,
						"IPtable Hash Entry");

	zrouter.nhgs =
		hash_create_size(8, zebra_nhg_hash_key, zebra_nhg_hash_equal,
				 "Zebra Router Nexthop Groups");
	zrouter.nhgs_id =
		hash_create_size(8, zebra_nhg_id_key, zebra_nhg_hash_id_equal,
				 "Zebra Router Nexthop Groups ID index");

	zrouter.rules_hash =
		hash_create_size(8, zebra_pbr_rules_hash_key,
				 zebra_pbr_rules_hash_equal, "Rules Hash");

	zrouter.qdisc_hash =
		hash_create_size(8, zebra_tc_qdisc_hash_key,
				 zebra_tc_qdisc_hash_equal, "TC (qdisc) Hash");
	zrouter.class_hash = hash_create_size(8, zebra_tc_class_hash_key,
					      zebra_tc_class_hash_equal,
					      "TC (classes) Hash");
	zrouter.filter_hash = hash_create_size(8, zebra_tc_filter_hash_key,
					       zebra_tc_filter_hash_equal,
					       "TC (filter) Hash");

	zrouter.asic_offloaded = asic_offload;
	zrouter.notify_on_ack = notify_on_ack;

	/*
	 * If you start using asic_notification_nexthop_control
	 * come talk to the FRR community about what you are doing
	 * We would like to know.
	 */
#if CONFDATE > 20251231
	CPP_NOTICE(
		"Remove zrouter.asic_notification_nexthop_control as that it's not being maintained or used");
#endif
	zrouter.asic_notification_nexthop_control = false;

#ifdef HAVE_SCRIPTING
	zebra_script_init();
#endif

	/* OS-specific init */
	kernel_router_init();
}

zserv

zebra作为其他协议进程的服务端,通过建立socket和其它的进程建立通道来交互信息。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
void zserv_start(char *path)
{
	int ret;
	mode_t old_mask;
	struct sockaddr_storage sa;
	socklen_t sa_len;

	if (!frr_zclient_addr(&sa, &sa_len, path))
		/* should be caught in zebra main() */
		return;

	/* Set umask */
	old_mask = umask(0077);

	/* Make UNIX domain socket. */
	zsock = socket(sa.ss_family, SOCK_STREAM, 0);
	if (zsock < 0) {
		flog_err_sys(EC_LIB_SOCKET, "Can't create zserv socket: %s",
			     safe_strerror(errno));
		return;
	}

	if (sa.ss_family != AF_UNIX) {
		sockopt_reuseaddr(zsock);
		sockopt_reuseport(zsock);
	} else {
		struct sockaddr_un *suna = (struct sockaddr_un *)&sa;
		if (suna->sun_path[0])
			unlink(suna->sun_path);
	}

	setsockopt_so_recvbuf(zsock, 1048576);
	setsockopt_so_sendbuf(zsock, 1048576);

	frr_with_privs((sa.ss_family != AF_UNIX) ? &zserv_privs : NULL) {
		ret = bind(zsock, (struct sockaddr *)&sa, sa_len);
	}
	if (ret < 0) {
		flog_err_sys(EC_LIB_SOCKET, "Can't bind zserv socket on %s: %s",
			     path, safe_strerror(errno));
		close(zsock);
		zsock = -1;
		return;
	}

	ret = listen(zsock, 5);
	if (ret < 0) {
		flog_err_sys(EC_LIB_SOCKET,
			     "Can't listen to zserv socket %s: %s", path,
			     safe_strerror(errno));
		close(zsock);
		zsock = -1;
		return;
	}

	umask(old_mask);

	zserv_event(NULL, ZSERV_ACCEPT);
}

void zserv_event(struct zserv *client, enum zserv_event event)
{
	switch (event) {
	case ZSERV_ACCEPT:
		event_add_read(zrouter.master, zserv_accept, NULL, zsock, NULL);
		break;
	case ZSERV_PROCESS_MESSAGES:
		event_add_event(zrouter.master, zserv_process_messages, client,
				0, &client->t_process);
		break;
	case ZSERV_HANDLE_CLIENT_FAIL:
		event_add_event(zrouter.master, zserv_handle_client_fail,
				client, 0, &client->t_cleanup);
	}
}

zserv_accept 接受客户端的请求,并创建一个新的客户端,还会给每个客户端创建一个线程处理客户端的读、写请求。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
/*
 * Accept socket connection.
 */
static void zserv_accept(struct event *thread)
{
	int accept_sock;
	int client_sock;
	struct sockaddr_in client;
	socklen_t len;

	accept_sock = EVENT_FD(thread);

	/* Reregister myself. */
	zserv_event(NULL, ZSERV_ACCEPT);

	len = sizeof(struct sockaddr_in);
	client_sock = accept(accept_sock, (struct sockaddr *)&client, &len);

	if (client_sock < 0) {
		flog_err_sys(EC_LIB_SOCKET, "Can't accept zebra socket: %s",
			     safe_strerror(errno));
		return;
	}

	/* Make client socket non-blocking.  */
	set_nonblocking(client_sock);

	/* Create new zebra client. */
	zserv_client_create(client_sock);
}

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
/*
 * Create a new client.
 *
 * This is called when a new connection is accept()'d on the ZAPI socket. It
 * initializes new client structure, notifies any subscribers of the connection
 * event and spawns the client's thread.
 *
 * sock
 *    client's socket file descriptor
 */
static struct zserv *zserv_client_create(int sock)
{
	struct zserv *client;
	size_t stream_size =
		MAX(ZEBRA_MAX_PACKET_SIZ, sizeof(struct zapi_route));
	int i;
	afi_t afi;

	client = XCALLOC(MTYPE_ZSERV_CLIENT, sizeof(struct zserv));

	/* Make client input/output buffer. */
	client->sock = sock;
	client->ibuf_fifo = stream_fifo_new();
	client->obuf_fifo = stream_fifo_new();
	client->ibuf_work = stream_new(stream_size);
	client->obuf_work = stream_new(stream_size);
	client->connect_time = monotime(NULL);
	pthread_mutex_init(&client->ibuf_mtx, NULL);
	pthread_mutex_init(&client->obuf_mtx, NULL);
	pthread_mutex_init(&client->stats_mtx, NULL);
	client->wb = buffer_new(0);
	TAILQ_INIT(&(client->gr_info_queue));

	/* Initialize flags */
	for (afi = AFI_IP; afi < AFI_MAX; afi++) {
		for (i = 0; i < ZEBRA_ROUTE_MAX; i++)
			client->redist[afi][i] = vrf_bitmap_init();
		client->redist_default[afi] = vrf_bitmap_init();
		client->ridinfo[afi] = vrf_bitmap_init();
		client->nhrp_neighinfo[afi] = vrf_bitmap_init();
	}

	/* Add this client to linked list. */
	frr_with_mutex (&client_mutex) {
		listnode_add(zrouter.client_list, client);
	}

	struct frr_pthread_attr zclient_pthr_attrs = {
		.start = frr_pthread_attr_default.start,
		.stop = frr_pthread_attr_default.stop
	};
	client->pthread =
		frr_pthread_new(&zclient_pthr_attrs, "Zebra API client thread",
				"zebra_apic");

	/* start read loop */
	zserv_client_event(client, ZSERV_CLIENT_READ);

	/* call callbacks */
	hook_call(zserv_client_connect, client);

	/* start pthread */
	frr_pthread_run(client->pthread, NULL);

	return client;
}

客户端比如bgp会调用zclient_new/zclient_init初始化客服端连接到zebra服务端,并发送关心的事件到zebra的服务端。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
void bgp_zebra_init(struct event_loop *master, unsigned short instance)
{
	zclient_num_connects = 0;

	if_zapi_callbacks(bgp_ifp_create, bgp_ifp_up,
			  bgp_ifp_down, bgp_ifp_destroy);

	/* Set default values. */
	zclient = zclient_new(master, &zclient_options_default, bgp_handlers,
			      array_size(bgp_handlers));
	zclient_init(zclient, ZEBRA_ROUTE_BGP, 0, &bgpd_privs);
	zclient->zebra_connected = bgp_zebra_connected;
	zclient->instance = instance;
}

bgp_zebra_connected 是连接服务端成功后,向zebra注册各种事件的回调函数。

rib_init

1
2
3
4
5
6
7
8
9
10
11
12
/* Routing information base initialize. */
void rib_init(void)
{
	check_route_info();

	rib_queue_init();

	/* Init dataplane, and register for results */
	pthread_mutex_init(&dplane_mutex, NULL);
	dplane_ctx_q_init(&rib_dplane_q);
	zebra_dplane_init(rib_dplane_results);
}

rib_queue_init初始化work queue相关的事情,ribq处理rib信息相关的,meta_queue_new会创建5个subq,每个队列是具有优先级的,也就是处理rib的消息是PQ的队列。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
/* initialise zebra rib work queue */
static void rib_queue_init(void)
{
	if (!(zrouter.ribq = work_queue_new(zrouter.master,
					    "route_node processing"))) {
		flog_err(EC_ZEBRA_WQ_NONEXISTENT,
			 "%s: could not initialise work queue!", __func__);
		return;
	}

	/* fill in the work queue spec */
	zrouter.ribq->spec.workfunc = &meta_queue_process;
	zrouter.ribq->spec.completion_func = NULL;
	/* XXX: TODO: These should be runtime configurable via vty */
	zrouter.ribq->spec.max_retries = 3;
	zrouter.ribq->spec.hold = ZEBRA_RIB_PROCESS_HOLD_TIME;
	zrouter.ribq->spec.retry = ZEBRA_RIB_PROCESS_RETRY_TIME;

	if (!(zrouter.mq = meta_queue_new())) {
		flog_err(EC_ZEBRA_WQ_NONEXISTENT,
			 "%s: could not initialise meta queue!", __func__);
		return;
	}
	return;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
/* Create new meta queue.
   A destructor function doesn't seem to be necessary here.
 */
static struct meta_queue *meta_queue_new(void)
{
	struct meta_queue *new;
	unsigned i;

	new = XCALLOC(MTYPE_WORK_QUEUE, sizeof(struct meta_queue));

	for (i = 0; i < MQ_SIZE; i++) {
		new->subq[i] = list_new();
		assert(new->subq[i]);
	}

	return new;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
/* meta-queue structure:
 * sub-queue 0: nexthop group objects
 * sub-queue 1: EVPN/VxLAN objects
 * sub-queue 2: Early Route Processing
 * sub-queue 3: Early Label Processing
 * sub-queue 4: connected
 * sub-queue 5: kernel
 * sub-queue 6: static
 * sub-queue 7: RIP, RIPng, OSPF, OSPF6, IS-IS, EIGRP, NHRP
 * sub-queue 8: iBGP, eBGP
 * sub-queue 9: any other origin (if any) typically those that
 *              don't generate routes
 */
#define MQ_SIZE 11
struct meta_queue {
	struct list *subq[MQ_SIZE];
	uint32_t size; /* sum of lengths of all subqueues */
};

zebra_dplane_init 是初始化data plane数据平面的信息,并会初始化linux kernel数据平面处理的函数,也就是会做一个适配层,隔离FRR和DP面,减少耦合。

zebra_dplane_start会Start the dataplane pthread,处理数据下发到DP 数据平面的消息。(main函数中调用)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
/*
 * Start the dataplane pthread. This step needs to be run later than the
 * 'init' step, in case zebra has fork-ed.
 */
void zebra_dplane_start(void)
{
	struct dplane_zns_info *zi;
	struct zebra_dplane_provider *prov;
	struct frr_pthread_attr pattr = {
		.start = frr_pthread_attr_default.start,
		.stop = frr_pthread_attr_default.stop
	};

	/* Start dataplane pthread */

	zdplane_info.dg_pthread = frr_pthread_new(&pattr, "Zebra dplane thread",
						  "zebra_dplane");

	zdplane_info.dg_master = zdplane_info.dg_pthread->master;

	zdplane_info.dg_run = true;

	/* Enqueue an initial event for the dataplane pthread */
	event_add_event(zdplane_info.dg_master, dplane_thread_loop, NULL, 0,
			&zdplane_info.dg_t_update);

	/* Enqueue requests and reads if necessary */
	frr_each (zns_info_list, &zdplane_info.dg_zns_list, zi) {
#if defined(HAVE_NETLINK)
		event_add_read(zdplane_info.dg_master, dplane_incoming_read, zi,
			       zi->info.sock, &zi->t_read);
		dplane_kernel_info_request(zi);
#endif
	}

	/* Call start callbacks for registered providers */

	DPLANE_LOCK();
	prov = dplane_prov_list_first(&zdplane_info.dg_providers);
	DPLANE_UNLOCK();

	while (prov) {

		if (prov->dp_start)
			(prov->dp_start)(prov);

		/* Locate next provider */
		prov = dplane_prov_list_next(&zdplane_info.dg_providers, prov);
	}

	frr_pthread_run(zdplane_info.dg_pthread, NULL);
}

zebra_mpls_init

初始化MPLS相关的信息,主要功能有:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
/*
 * Global MPLS initialization.
 */
void zebra_mpls_init(void)
{
	mpls_enabled = false;
	mpls_pw_reach_strict = false;

	if (mpls_kernel_init() < 0) {
		flog_warn(EC_ZEBRA_MPLS_SUPPORT_DISABLED,
			  "Disabling MPLS support (no kernel support)");
		return;
	}

	zebra_mpls_turned_on();
}
  1. 内核是否支持MPLS
1
2
3
4
5
6
7
8
9
10
11
12
int mpls_kernel_init(void)
{
	struct stat st;

	/*
	 * Check if the MPLS module is loaded in the kernel.
	 */
	if (stat("/proc/sys/net/mpls", &st) != 0)
		return -1;

	return 0;
};
  1. Zebra处理MPLS 信息的work queue
1
2
3
4
5
6
7
8
9
10
11
12
13
/*
 * Initialize work queue for processing changed LSPs.
 */
static void mpls_processq_init(void)
{
	zrouter.lsp_process_q = work_queue_new(zrouter.master, "LSP processing");

	zrouter.lsp_process_q->spec.workfunc = &lsp_process;
	zrouter.lsp_process_q->spec.del_item_data = &lsp_processq_del;
	zrouter.lsp_process_q->spec.completion_func = &lsp_processq_complete;
	zrouter.lsp_process_q->spec.max_retries = 0;
	zrouter.lsp_process_q->spec.hold = 10;
}

最后 frr_run ,zebra 主线程跑起来,全部初始化完成,其它的初始化点,后续在继续补充 !!!!!!!


本文参考

  1. BGP官方文档
  2. FRR BGP 协议分析
本文由作者按照 CC BY 4.0 进行授权