欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

【OVS2.5源码解读】datapath的netlink机制

程序员文章站 2024-03-23 13:16:10
...

datapath为 ovs内核模块,负责执行数据交换,也就是把从接收端口收到的数据包在流表中进行匹配,并执行匹配到的动作。

一个datapath可以对应多个vport,一个vport类似物理交换机的端口概念。一个datapth关联一个flow table,一个flow table包含多个条目,每个条目包括两个内容:一个match/key和一个action

首先来分析下upcall函数调用的原因。如果看了前面的源码分析的就会知道,在什么情况下会调用upcall函数呢?就是在一个数据包查找不到相应的流表项时,才会调用upcall函数(比如一个数据包第一次进入这个内核,里面没有为这个数据包设定相应的流表规则)。upcall函数的调用其实就是把数据包的信息发到用户空间去,而由内核空间到用户空间的通信则要用到linux中的NetLink机制。所以熟悉下NetLink通信可以知道upcall函数调用需要什么样的参数以及整个函数的作用和功能。

通过一个例子来看看 OVS 中数据包是如何进行转发的:
【OVS2.5源码解读】datapath的netlink机制
1)ovs 的 datapath 接收到从 ovs 连接的某个网络端口发来的数据包,从数据包中提取源/目的 IP、源/目的 MAC、端口等信息。

2)ovs 在内核态查看流表结构(通过 hash),如果命中,则快速转发。

3)如果没有命中,内核态不知道如何处置这个数据包,所以,通过 netlink upcall 机制从内核态通知用户态,发送给 ovs-vswitchd 组件处理。

4)ovs-vswitchd 查询用户态精确流表和模糊流表,如果还不命中,在 SDN 控制器接入的情况下,经过 OpenFlow 协议,通告给控制器,由控制器处理。

5)如果模糊命中, ovs-vswitchd 会同时刷新用户态精确流表和内核态精确流表,如果精确命中,则只更新内核态流表。

6)刷新后,重新把该数据包注入给内核态 datapath 模块处理。

7)datapath 重新发起选路,查询内核流表,匹配;报文转发,结束。

NetLink的使用

NetLink由两部分程序构成,一部分是用户空间的,另外一部分是内核空间的。用户空间的和大多数socket编程一样,只是用的协议时AF_NETLINK,其他基本都是一样的步骤

ovs datapath是通过netlink与用户态进行通信的,实现dp、端口、流表、packet的操作。 netlink的注册是在datapath模块的初始化函数中实现的。

1、dp_init函数

static int __init dp_init(void)  
{  
    int err;  

    BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));  

    pr_info("Open vSwitch switching datapath %s\n", VERSION);  

    err = compat_init();  
    if (err)  
        goto error;  

    err = action_fifos_init();  
    if (err)  
        goto error_compat_exit;  

    err = ovs_internal_dev_rtnl_link_register();  
    if (err)  
        goto error_action_fifos_exit;  

    err = ovs_flow_init();  
    if (err)  
        goto error_unreg_rtnl_link;  

    err = ovs_vport_init();  
    if (err)  
        goto error_flow_exit;  

    err = register_pernet_device(&ovs_net_ops);  
    if (err)  
        goto error_vport_exit;  

    err = register_netdevice_notifier(&ovs_dp_device_notifier);  
    if (err)  
        goto error_netns_exit;  

    err = ovs_netdev_init();  
    if (err)  
        goto error_unreg_notifier;  

    err = dp_register_genl();   //注册netlink处理函数  
    if (err < 0)  
        goto error_unreg_netdev;  

    return 0;  

error_unreg_netdev:  
    ovs_netdev_exit();  
error_unreg_notifier:  
    unregister_netdevice_notifier(&ovs_dp_device_notifier);  
error_netns_exit:  
    unregister_pernet_device(&ovs_net_ops);  
error_vport_exit:  
    ovs_vport_exit();  
error_flow_exit:  
    ovs_flow_exit();  
error_unreg_rtnl_link:  
    ovs_internal_dev_rtnl_link_unregister();  
error_action_fifos_exit:  
    action_fifos_exit();  
error_compat_exit:  
    compat_exit();  
error:  
    return err;  
}  

2、genl_register_family函数

#define genl_register_family rpl_genl_register_family  
static inline int rpl_genl_register_family(struct genl_family *family)  
{  
    family->module = THIS_MODULE;  
    return rpl___genl_register_family(family);        //注册netlink  
}  

3、rpl___genl_register_family函数

int rpl___genl_register_family(struct rpl_genl_family *f)  
{  
    int err;  

    f->compat_family.id = f->id;  
    f->compat_family.hdrsize = f->hdrsize;  
    strncpy(f->compat_family.name, f->name, GENL_NAMSIZ);  
    f->compat_family.version = f->version;  
    f->compat_family.maxattr = f->maxattr;  
    f->compat_family.netnsok = f->netnsok;  
#ifdef HAVE_PARALLEL_OPS  
    f->compat_family.parallel_ops = f->parallel_ops;  
#endif  
    err = genl_register_family_with_ops(&f->compat_family,       //调用系统接口,可以不用关注消息处理,可以把ops作为netlink的处理入口函数  
                        (struct genl_ops *) f->ops, f->n_ops);  
    if (err)  
        goto error;  

    if (f->mcgrps) {  
        /* Need to Fix GROUP_ID() for more than one group. */  
        BUG_ON(f->n_mcgrps > 1);  
        err = genl_register_mc_group(&f->compat_family,      //调用系统接口,暂时只看到dp中用ovs_notify中在使用  
                         (struct genl_multicast_group *) f->mcgrps);  
        if (err)  
            goto error;  
    }  
error:  
    return err;  

}  

netlink的操作是由dp_genl_families定义的,一共有4类。

static struct genl_family *dp_genl_families[] = {  
    &dp_datapath_genl_family,  
    &dp_vport_genl_family,  
    &dp_flow_genl_family,  
    &dp_packet_genl_family,  
};  

1、datapath netlink定义:

static struct genl_family dp_datapath_genl_family = {  
    .id = GENL_ID_GENERATE,  
    .hdrsize = sizeof(struct ovs_header),  
    .name = OVS_DATAPATH_FAMILY,  
    .version = OVS_DATAPATH_VERSION,  
    .maxattr = OVS_DP_ATTR_MAX,  
    .netnsok = true,  
    .parallel_ops = true,  
    .ops = dp_datapath_genl_ops,  
    .n_ops = ARRAY_SIZE(dp_datapath_genl_ops),  
    .mcgrps = &ovs_dp_datapath_multicast_group,  
    .n_mcgrps = 1,  
};  

static struct genl_ops dp_datapath_genl_ops[] = {  
    { .cmd = OVS_DP_CMD_NEW,  
      .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */  
      .policy = datapath_policy,  
      .doit = ovs_dp_cmd_new      /*创建datapath*/
    },  
    { .cmd = OVS_DP_CMD_DEL,  
      .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */  
      .policy = datapath_policy,  
      .doit = ovs_dp_cmd_del     /*删除datapath*/ 
    },  
    { .cmd = OVS_DP_CMD_GET,  
      .flags = 0,           /* OK for unprivileged users. */  
      .policy = datapath_policy,  
      .doit = ovs_dp_cmd_get,  
      .dumpit = ovs_dp_cmd_dump   /*导出datapath*/ 
    },  
    { .cmd = OVS_DP_CMD_SET,  
      .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */  
      .policy = datapath_policy,  
      .doit = ovs_dp_cmd_set,     /*修改datapath*/
    },  
};  

2、vport netlink定义:

struct genl_family dp_vport_genl_family = {  
    .id = GENL_ID_GENERATE,  
    .hdrsize = sizeof(struct ovs_header),  
    .name = OVS_VPORT_FAMILY,  
    .version = OVS_VPORT_VERSION,  
    .maxattr = OVS_VPORT_ATTR_MAX,  
    .netnsok = true,  
    .parallel_ops = true,  
    .ops = dp_vport_genl_ops,  
    .n_ops = ARRAY_SIZE(dp_vport_genl_ops),  
    .mcgrps = &ovs_dp_vport_multicast_group,  
    .n_mcgrps = 1,  
};  

static struct genl_ops dp_vport_genl_ops[] = {  
    { .cmd = OVS_VPORT_CMD_NEW,  
      .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */  
      .policy = vport_policy,  
      .doit = ovs_vport_cmd_new /*创建vport*/  
    },  
    { .cmd = OVS_VPORT_CMD_DEL,  
      .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */  
      .policy = vport_policy,  
      .doit = ovs_vport_cmd_del /*删除vport*/  
    },  
    { .cmd = OVS_VPORT_CMD_GET,  
      .flags = 0,           /* OK for unprivileged users. */  
      .policy = vport_policy,  
      .doit = ovs_vport_cmd_get,  
      .dumpit = ovs_vport_cmd_dump  /*导出vport*/ 
    },  
    { .cmd = OVS_VPORT_CMD_SET,  
      .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */  
      .policy = vport_policy,  
      .doit = ovs_vport_cmd_set,    /*修改vport*/  
    },  
};  

3、精确流表 netlink定义:

static struct genl_family dp_flow_genl_family = {  
    .id = GENL_ID_GENERATE,  
    .hdrsize = sizeof(struct ovs_header),  
    .name = OVS_FLOW_FAMILY,  
    .version = OVS_FLOW_VERSION,  
    .maxattr = OVS_FLOW_ATTR_MAX,  
    .netnsok = true,  
    .parallel_ops = true,  
    .ops = dp_flow_genl_ops,  /* datapath流表更新的入口函数 */
    .n_ops = ARRAY_SIZE(dp_flow_genl_ops),  
    .mcgrps = &ovs_dp_flow_multicast_group,  
    .n_mcgrps = 1,  
};  

static struct genl_ops dp_flow_genl_ops[] = {  
    { .cmd = OVS_FLOW_CMD_NEW,  
      .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */  
      .policy = flow_policy,  
      .doit = ovs_flow_cmd_new  /*添加精确流表 */
    },  
    { .cmd = OVS_FLOW_CMD_DEL,  
      .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */  
      .policy = flow_policy,  
      .doit = ovs_flow_cmd_del  /*删除精确流表*/
    },  
    { .cmd = OVS_FLOW_CMD_GET,  
      .flags = 0,           /* OK for unprivileged users. */  
      .policy = flow_policy,  
      .doit = ovs_flow_cmd_get,  
      .dumpit = ovs_flow_cmd_dump   /*导出精确流表*/  
    },  
    { .cmd = OVS_FLOW_CMD_SET,  
      .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */  
      .policy = flow_policy,  
      .doit = ovs_flow_cmd_set, /*修改精确流表*/  
    },  
};  

4、packet netlink定义:

static struct genl_family dp_packet_genl_family = {  
    .id = GENL_ID_GENERATE,  
    .hdrsize = sizeof(struct ovs_header),  
    .name = OVS_PACKET_FAMILY,  
    .version = OVS_PACKET_VERSION,  
    .maxattr = OVS_PACKET_ATTR_MAX,  
    .netnsok = true,  
    .parallel_ops = true,  
    .ops = dp_packet_genl_ops,  
    .n_ops = ARRAY_SIZE(dp_packet_genl_ops),  
};  

static struct genl_ops dp_packet_genl_ops[] = {  
    { .cmd = OVS_PACKET_CMD_EXECUTE,  
      .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */  
      .policy = packet_policy,  
      .doit = ovs_packet_cmd_execute    /*执行报文action操作*/  
    }  
};  

upcall线程与netlink

假设upcall handler线程有两个,vport有四个,那么每个vport下都将持有两个NetLink连接的信息,这两个NetLink连接将被用来上送upcall消息。
每个NetLink连接被对应的upcall handler线程接管,多个vport在同一线程中的NetLink连接被epoll管理。
【OVS2.5源码解读】datapath的netlink机制

1:每个vport下都挂多个NetLink连接,数量等同于upcall处理线程的数量
2:线程中routine函数为udpif_upcall_handler,伪码如下:

routine {
     while(线程不设自杀标记) {
          if(从epoll中收到了一些upcall消息,则进一步处理upcall) {
               continue; 
          } else {
               注册poll:当epoll_fd有骚动时解除当前线程的block
          }
          block当前线程
     }
}

3:每个NetLink连接被称为一个upcall channel