欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

OpenvSwitch key模块详解

程序员文章站 2022-07-07 14:18:16
...
struct sw_flow_key {
/*ip隧道层------------------------------------------------------------------------------------------------------------------------------------------------------------*/
 u8 tun_proto;                   /* Protocol of encapsulating tunnel. */
	struct ip_tunnel_key tun_key;  /* Encapsulating tunnel key. */
	u8 tun_opts[255];
	u8 tun_opts_len;
/*物理层-------------------------------------------------------------------------------------------------------------------------------------------------------------*/

	struct {
		u32	priority;	/* Packet QoS priority. */
		u32	skb_mark;	/* SKB mark. */
		u16	in_port;	/* Input switch port (or DP_MAX_PORTS). */
	} __packed phy; /* Safe when right after 'tun_key'. */
/*链路层-------------------------------------------------------------------------------------------------------------------------------------------------------------*/
	u32 ovs_flow_hash;		/* Datapath computed hash value.  */
        u8 mac_proto;			/* MAC layer protocol (e.g. Ethernet). */	
	u32 recirc_id;			/* Recirculation ID.  */
	struct {
		u8     src[ETH_ALEN];	/* Ethernet source address. */
		u8     dst[ETH_ALEN];	/* Ethernet destination address. */
		struct vlan_head vlan;
		struct vlan_head cvlan;
		__be16 type;		/* Ethernet frame type. */
	} eth;
/*网络层-------------------------------------------------------------------------------------------------------------------------------------------------------------*/
	union {
		struct {
			__be32 top_lse;	/* top label stack entry */
		} mpls;
		struct {
			u8     proto;	/* IP protocol or lower 8 bits of ARP opcode. */
			u8     tos;	    /* IP ToS. */
			u8     ttl;	    /* IP TTL/hop limit. */
			u8     frag;	/* One of OVS_FRAG_TYPE_*. */
		} ip;
	};
	union {
		struct {
			struct {
				__be32 src;	/* IP source address. */
				__be32 dst;	/* IP destination address. */
			} addr;
			union {
				struct {
					__be32 src;
					__be32 dst;
				} ct_orig;	/* Conntrack original direction fields. */
				struct {
					u8 sha[ETH_ALEN];	/* ARP source hardware address. */
					u8 tha[ETH_ALEN];	/* ARP target hardware address. */
				} arp;
			};
		} ipv4;
		struct {
			struct {
				struct in6_addr src;	/* IPv6 source address. */
				struct in6_addr dst;	/* IPv6 destination address. */
			} addr;
			__be32 label;			/* IPv6 flow label. */
			union {
				struct {
					struct in6_addr src;
					struct in6_addr dst;
				} ct_orig;	/* Conntrack original direction fields. */
				struct {
					struct in6_addr target;	/* ND target address. */
					u8 sll[ETH_ALEN];	/* ND source link layer address. */
					u8 tll[ETH_ALEN];	/* ND target link layer address. */
				} nd;
			};
		} ipv6;
		struct ovs_key_nsh nsh;         /* network service header */
	};
/*传输层-------------------------------------------------------------------------------------------------------------------------------------------------------------*/
传输层
	struct {
		__be16 src;		/* TCP/UDP/SCTP source port. */
		__be16 dst;		/* TCP/UDP/SCTP destination port. */
		__be16 flags;		/* TCP flags. */
	} tp;
/*ct 层-------------------------------------------------------------------------------------------------------------------------------------------------------------*/
	/* Filling a hole of two bytes. */
	u8 ct_state;
	u16 ct_zone;			/* Conntrack zone. */
	u8 ct_orig_proto;		/* CT original direction tuple IP protocol. */
	struct {
		/* Connection tracking fields not packed above. */
		struct {
			__be16 src;	/* CT orig tuple tp src port. */
			__be16 dst;	/* CT orig tuple tp dst port. */
		} orig_tp;
		u32 mark;
		struct ovs_key_ct_labels labels;
	} ct;
} __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */

struct sw_flow_key结构比较复杂,下面用层次化的图形将其表示得更清晰一些。

struct sw_flow_key

                                                                         `OpenvSwitch key模块详解

IP隧道层:

                                       OpenvSwitch key模块详解

物理层

                                                                                OpenvSwitch key模块详解

链路层

                                       OpenvSwitch key模块详解

网络层

                                   OpenvSwitch key模块详解

传输层

                                                      OpenvSwitch key模块详解

ct层:

                                          OpenvSwitch key模块详解

sw_flow_key解析流程

OpenvSwitch key模块详解

 

ip 隧道层处理

int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
			 struct sk_buff *skb, struct sw_flow_key *key)
{
	if (tun_info) {
        //获取隧道协议号
		key->tun_proto = ip_tunnel_info_af(tun_info);
        //获取隧道信息的key
		memcpy(&key->tun_key, &tun_info->key, sizeof(key->tun_key));
		BUILD_BUG_ON(((1 << (sizeof(tun_info->options_len) * 8)) - 1) >
			     sizeof(key->tun_opts));
        //获取隧道操作函数集 opts,并更新 opts_len
		if (tun_info->options_len) {
			ip_tunnel_info_opts_get(TUN_METADATA_OPTS(key, tun_info->options_len),
						tun_info);
			key->tun_opts_len = tun_info->options_len;
		} else {
			key->tun_opts_len = 0;
		}
	} else  {
        //不使用ip隧道,相应ip隧道信息字段全部置零
		key->tun_proto = 0;
		key->tun_opts_len = 0;
		memset(&key->tun_key, 0, sizeof(key->tun_key));
	}
}

物理层处理

int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
			 struct sk_buff *skb, struct sw_flow_key *key)
{
    //获取优先级
	key->phy.priority = skb->priority;
    //获取包接收端口
	key->phy.in_port = OVS_CB(skb)->input_vport->port_no;
	key->phy.skb_mark = skb->mark;
	key->ovs_flow_hash = 0;
    //获取链路层协议种类(取决于skb->dev_type),为链路层的处理做准备
	res = key_extract_mac_proto(skb); 
	if (res < 0)
		return res;
	key->mac_proto = res;
	key->recirc_id = 0;
    //进行链路层,网络层和传输层处理
	err = key_extract(skb, key);
}

key_extract_mac_proto

static int key_extract_mac_proto(struct sk_buff *skb)
{
	switch (skb->dev->type) {
	case ARPHRD_ETHER:
		return MAC_PROTO_ETHERNET;
	case ARPHRD_NONE:
		if (skb->protocol == htons(ETH_P_TEB))
			return MAC_PROTO_ETHERNET;
		return MAC_PROTO_NONE;
	}
	WARN_ON_ONCE(1);
	return -EINVAL;
}

链路层、网络层、传输层的代码分析需要 sk_buff 处理基础。教程可参考:

https://blog.csdn.net/dog250/article/details/43637053

链路层处理

static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
{
	int error;
	struct ethhdr *eth;
    /*此时skb->data指针正好指向mac协议头开始的位置,
     *故可以直接调用skb_reset_mac_header(),记录下mac头的位置,
     *这也是传入key_extract函数的skb需要满足的条件
     */
	skb_reset_mac_header(skb);
	clear_vlan(key);
    //若没有链路层
	if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) {
		skb_reset_network_header(skb);//直接记录下skb中网络层协议头的位置
		key->eth.type = skb->protocol;
	} else {
		eth = eth_hdr(skb);//获取mac协议头
        //将mac头地址,和mac目的地址写入sw_flow_key
		ether_addr_copy(key->eth.src, eth->h_source);
		ether_addr_copy(key->eth.dst, eth->h_dest);
        //将skb->data向里压入2个ETH_ALEN,使之指向net层协议头开始的位置
		__skb_pull(skb, 2 * ETH_ALEN);
        //获取网络层协议种类,为网络层分析做准备
		key->eth.type = parse_ethertype(skb);
        //记录下net层协议头的起始位置
		skb_reset_network_header(skb);
        //还原skb->data,使之重新指向mac头起始处
		__skb_push(skb, skb->data - skb_mac_header(skb));
	}
    //记录下mac头起始处
	skb_reset_mac_len(skb);
}

ethhdr

struct ethhdr {
    unsigned char h_dest[ETH_ALEN]; //目的MAC地址     
    unsigned char h_source[ETH_ALEN]; //源MAC地址    
    __u16 h_proto ; //网络层所使用的协议类型
}__attribute__((packed))  //用于告诉编译器不要对这个结构体中的缝隙部分进行填充操作

网络层

ipv4

if (key->eth.type == htons(ETH_P_IP)) {
		struct iphdr *nh;
		__be16 offset;
        
		error = check_iphdr(skb);
        
		nh = ip_hdr(skb);
		key->ipv4.addr.src = nh->saddr;
		key->ipv4.addr.dst = nh->daddr;

		key->ip.proto = nh->protocol;
		key->ip.tos = nh->tos;
		key->ip.ttl = nh->ttl;
//偏移
		offset = nh->frag_off & htons(IP_OFFSET);
		if (offset) {
			key->ip.frag = OVS_FRAG_TYPE_LATER;
			return 0;
		}
//处理分片
#ifdef HAVE_SKB_GSO_UDP
		if (nh->frag_off & htons(IP_MF) ||
			skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
#else
		if (nh->frag_off & htons(IP_MF))
#endif
			key->ip.frag = OVS_FRAG_TYPE_FIRST;
		else
			key->ip.frag = OVS_FRAG_TYPE_NONE;

ARP或RARP

else if (key->eth.type == htons(ETH_P_ARP) ||
		   key->eth.type == htons(ETH_P_RARP)) {
		struct arp_eth_header *arp;
		bool arp_available = arphdr_ok(skb);

		arp = (struct arp_eth_header *)skb_network_header(skb);

		if (arp_available &&
		    arp->ar_hrd == htons(ARPHRD_ETHER) &&
		    arp->ar_pro == htons(ETH_P_IP) &&
		    arp->ar_hln == ETH_ALEN &&
		    arp->ar_pln == 4) {

			/* We only match on the lower 8 bits of the opcode. */
			if (ntohs(arp->ar_op) <= 0xff)
				key->ip.proto = ntohs(arp->ar_op);
			else
				key->ip.proto = 0;

			memcpy(&key->ipv4.addr.src, arp->ar_sip, sizeof(key->ipv4.addr.src));
			memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst));
			ether_addr_copy(key->ipv4.arp.sha, arp->ar_sha);
			ether_addr_copy(key->ipv4.arp.tha, arp->ar_tha);
		} else {
			memset(&key->ip, 0, sizeof(key->ip));
			memset(&key->ipv4, 0, sizeof(key->ipv4));
		}
	} 

mpls

else if (eth_p_mpls(key->eth.type)) {
		size_t stack_len = MPLS_HLEN;

		skb_set_inner_network_header(skb, skb->mac_len);
		while (1) {
			__be32 lse;

			memcpy(&lse, skb_inner_network_header(skb), MPLS_HLEN);

			if (stack_len == MPLS_HLEN)
				memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN);

			skb_set_inner_network_header(skb, skb->mac_len + stack_len);
			if (lse & htonl(MPLS_LS_S_MASK))
				break;

			stack_len += MPLS_HLEN;
		}
	} 

ipv6

else if (key->eth.type == htons(ETH_P_IPV6)) {
		int nh_len;             /* IPv6 Header + Extensions */

		nh_len = parse_ipv6hdr(skb, key);
		if (unlikely(nh_len < 0)) {
			switch (nh_len) {
			case -EINVAL:
				memset(&key->ip, 0, sizeof(key->ip));
				memset(&key->ipv6.addr, 0, sizeof(key->ipv6.addr));
				/* fall-through */
			case -EPROTO:
				skb->transport_header = skb->network_header;
				error = 0;
				break;
			default:
				error = nh_len;
			}
			return error;
		}

		if (key->ip.frag == OVS_FRAG_TYPE_LATER)
			return 0;
#ifdef HAVE_SKB_GSO_UDP
		if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
			key->ip.frag = OVS_FRAG_TYPE_FIRST;

#endif

nsh

else if (key->eth.type == htons(ETH_P_NSH)) {
		error = parse_nsh(skb, key);
		if (error)
			return error;
	}

传输层

TCP

//ipv4 
if (key->ip.proto == IPPROTO_TCP) {
			if (tcphdr_ok(skb)) {
				struct tcphdr *tcp = tcp_hdr(skb);
				key->tp.src = tcp->source;
				key->tp.dst = tcp->dest;
				key->tp.flags = TCP_FLAGS_BE16(tcp);
			} else {
				memset(&key->tp, 0, sizeof(key->tp));
			}

		} 
//ipv6
if (key->ip.proto == NEXTHDR_TCP) {
			if (tcphdr_ok(skb)) {
				struct tcphdr *tcp = tcp_hdr(skb);
				key->tp.src = tcp->source;
				key->tp.dst = tcp->dest;
				key->tp.flags = TCP_FLAGS_BE16(tcp);
			} else {
				memset(&key->tp, 0, sizeof(key->tp));
			}
		} 

UDP

//ipv4
else if (key->ip.proto == IPPROTO_UDP) {
			if (udphdr_ok(skb)) {
				struct udphdr *udp = udp_hdr(skb);
				key->tp.src = udp->source;
				key->tp.dst = udp->dest;
			} else {
				memset(&key->tp, 0, sizeof(key->tp));
			}
		} 
//ipv6
else if (key->ip.proto == NEXTHDR_UDP) {
			if (udphdr_ok(skb)) {
				struct udphdr *udp = udp_hdr(skb);
				key->tp.src = udp->source;
				key->tp.dst = udp->dest;
			} else {
				memset(&key->tp, 0, sizeof(key->tp));
			}
		} 

SCTP

//ipv4
else if (key->ip.proto == IPPROTO_SCTP) {
			if (sctphdr_ok(skb)) {
				struct sctphdr *sctp = sctp_hdr(skb);
				key->tp.src = sctp->source;
				key->tp.dst = sctp->dest;
			} else {
				memset(&key->tp, 0, sizeof(key->tp));
			}
		} 
//ipv6
else if (key->ip.proto == NEXTHDR_SCTP) {
			if (sctphdr_ok(skb)) {
				struct sctphdr *sctp = sctp_hdr(skb);
				key->tp.src = sctp->source;
				key->tp.dst = sctp->dest;
			} else {
				memset(&key->tp, 0, sizeof(key->tp));
			}
		} 

ICMP

//ipv4
else if (key->ip.proto == IPPROTO_ICMP) {
			if (icmphdr_ok(skb)) {
				struct icmphdr *icmp = icmp_hdr(skb);
				/* The ICMP type and code fields use the 16-bit
				 * transport port fields, so we need to store
				 * them in 16-bit network byte order.
				 */
				key->tp.src = htons(icmp->type);
				key->tp.dst = htons(icmp->code);
			} else {
				memset(&key->tp, 0, sizeof(key->tp));
			}
		}
//ipv6
else if (key->ip.proto == NEXTHDR_ICMP) {
			if (icmp6hdr_ok(skb)) {
				error = parse_icmpv6(skb, key, nh_len);
				if (error)
					return error;
			} else {
				memset(&key->tp, 0, sizeof(key->tp));
			}
		}

ct层:

static inline void ovs_ct_fill_key(const struct sk_buff *skb,
				   struct sw_flow_key *key)
{
	key->ct_state = 0;
	key->ct_zone = 0;
	key->ct.mark = 0;
	memset(&key->ct.labels, 0, sizeof(key->ct.labels));
	/* Clear 'ct_orig_proto' to mark the non-existence of original
	 * direction key fields.
	 */
	key->ct_orig_proto = 0;
}

总结:

各层的协议分析一般均按以下几个步骤:

1、定义一个该层协议头结构体。

2、调用api从skb中获取该协议头。

3、根据协议头中的字段,将各字段拷贝到sw_flow_key中。