OpenvSwitch key模块详解
程序员文章站
2022-07-07 14:18:16
...
struct sw_flow_key {
/*ip隧道层------------------------------------------------------------------------------------------------------------------------------------------------------------*/
u8 tun_proto; /* Protocol of encapsulating tunnel. */
struct ip_tunnel_key tun_key; /* Encapsulating tunnel key. */
u8 tun_opts[255];
u8 tun_opts_len;
/*物理层-------------------------------------------------------------------------------------------------------------------------------------------------------------*/
struct {
u32 priority; /* Packet QoS priority. */
u32 skb_mark; /* SKB mark. */
u16 in_port; /* Input switch port (or DP_MAX_PORTS). */
} __packed phy; /* Safe when right after 'tun_key'. */
/*链路层-------------------------------------------------------------------------------------------------------------------------------------------------------------*/
u32 ovs_flow_hash; /* Datapath computed hash value. */
u8 mac_proto; /* MAC layer protocol (e.g. Ethernet). */
u32 recirc_id; /* Recirculation ID. */
struct {
u8 src[ETH_ALEN]; /* Ethernet source address. */
u8 dst[ETH_ALEN]; /* Ethernet destination address. */
struct vlan_head vlan;
struct vlan_head cvlan;
__be16 type; /* Ethernet frame type. */
} eth;
/*网络层-------------------------------------------------------------------------------------------------------------------------------------------------------------*/
union {
struct {
__be32 top_lse; /* top label stack entry */
} mpls;
struct {
u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */
u8 tos; /* IP ToS. */
u8 ttl; /* IP TTL/hop limit. */
u8 frag; /* One of OVS_FRAG_TYPE_*. */
} ip;
};
union {
struct {
struct {
__be32 src; /* IP source address. */
__be32 dst; /* IP destination address. */
} addr;
union {
struct {
__be32 src;
__be32 dst;
} ct_orig; /* Conntrack original direction fields. */
struct {
u8 sha[ETH_ALEN]; /* ARP source hardware address. */
u8 tha[ETH_ALEN]; /* ARP target hardware address. */
} arp;
};
} ipv4;
struct {
struct {
struct in6_addr src; /* IPv6 source address. */
struct in6_addr dst; /* IPv6 destination address. */
} addr;
__be32 label; /* IPv6 flow label. */
union {
struct {
struct in6_addr src;
struct in6_addr dst;
} ct_orig; /* Conntrack original direction fields. */
struct {
struct in6_addr target; /* ND target address. */
u8 sll[ETH_ALEN]; /* ND source link layer address. */
u8 tll[ETH_ALEN]; /* ND target link layer address. */
} nd;
};
} ipv6;
struct ovs_key_nsh nsh; /* network service header */
};
/*传输层-------------------------------------------------------------------------------------------------------------------------------------------------------------*/
传输层
struct {
__be16 src; /* TCP/UDP/SCTP source port. */
__be16 dst; /* TCP/UDP/SCTP destination port. */
__be16 flags; /* TCP flags. */
} tp;
/*ct 层-------------------------------------------------------------------------------------------------------------------------------------------------------------*/
/* Filling a hole of two bytes. */
u8 ct_state;
u16 ct_zone; /* Conntrack zone. */
u8 ct_orig_proto; /* CT original direction tuple IP protocol. */
struct {
/* Connection tracking fields not packed above. */
struct {
__be16 src; /* CT orig tuple tp src port. */
__be16 dst; /* CT orig tuple tp dst port. */
} orig_tp;
u32 mark;
struct ovs_key_ct_labels labels;
} ct;
} __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */
struct sw_flow_key结构比较复杂,下面用层次化的图形将其表示得更清晰一些。
struct sw_flow_key
`
IP隧道层:
物理层
链路层
网络层
传输层
ct层:
sw_flow_key解析流程
ip 隧道层处理
int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
struct sk_buff *skb, struct sw_flow_key *key)
{
if (tun_info) {
//获取隧道协议号
key->tun_proto = ip_tunnel_info_af(tun_info);
//获取隧道信息的key
memcpy(&key->tun_key, &tun_info->key, sizeof(key->tun_key));
BUILD_BUG_ON(((1 << (sizeof(tun_info->options_len) * 8)) - 1) >
sizeof(key->tun_opts));
//获取隧道操作函数集 opts,并更新 opts_len
if (tun_info->options_len) {
ip_tunnel_info_opts_get(TUN_METADATA_OPTS(key, tun_info->options_len),
tun_info);
key->tun_opts_len = tun_info->options_len;
} else {
key->tun_opts_len = 0;
}
} else {
//不使用ip隧道,相应ip隧道信息字段全部置零
key->tun_proto = 0;
key->tun_opts_len = 0;
memset(&key->tun_key, 0, sizeof(key->tun_key));
}
}
物理层处理
int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
struct sk_buff *skb, struct sw_flow_key *key)
{
//获取优先级
key->phy.priority = skb->priority;
//获取包接收端口
key->phy.in_port = OVS_CB(skb)->input_vport->port_no;
key->phy.skb_mark = skb->mark;
key->ovs_flow_hash = 0;
//获取链路层协议种类(取决于skb->dev_type),为链路层的处理做准备
res = key_extract_mac_proto(skb);
if (res < 0)
return res;
key->mac_proto = res;
key->recirc_id = 0;
//进行链路层,网络层和传输层处理
err = key_extract(skb, key);
}
key_extract_mac_proto
static int key_extract_mac_proto(struct sk_buff *skb)
{
switch (skb->dev->type) {
case ARPHRD_ETHER:
return MAC_PROTO_ETHERNET;
case ARPHRD_NONE:
if (skb->protocol == htons(ETH_P_TEB))
return MAC_PROTO_ETHERNET;
return MAC_PROTO_NONE;
}
WARN_ON_ONCE(1);
return -EINVAL;
}
链路层、网络层、传输层的代码分析需要 sk_buff 处理基础。教程可参考:
https://blog.csdn.net/dog250/article/details/43637053
链路层处理
static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
{
int error;
struct ethhdr *eth;
/*此时skb->data指针正好指向mac协议头开始的位置,
*故可以直接调用skb_reset_mac_header(),记录下mac头的位置,
*这也是传入key_extract函数的skb需要满足的条件
*/
skb_reset_mac_header(skb);
clear_vlan(key);
//若没有链路层
if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) {
skb_reset_network_header(skb);//直接记录下skb中网络层协议头的位置
key->eth.type = skb->protocol;
} else {
eth = eth_hdr(skb);//获取mac协议头
//将mac头地址,和mac目的地址写入sw_flow_key
ether_addr_copy(key->eth.src, eth->h_source);
ether_addr_copy(key->eth.dst, eth->h_dest);
//将skb->data向里压入2个ETH_ALEN,使之指向net层协议头开始的位置
__skb_pull(skb, 2 * ETH_ALEN);
//获取网络层协议种类,为网络层分析做准备
key->eth.type = parse_ethertype(skb);
//记录下net层协议头的起始位置
skb_reset_network_header(skb);
//还原skb->data,使之重新指向mac头起始处
__skb_push(skb, skb->data - skb_mac_header(skb));
}
//记录下mac头起始处
skb_reset_mac_len(skb);
}
ethhdr
struct ethhdr {
unsigned char h_dest[ETH_ALEN]; //目的MAC地址
unsigned char h_source[ETH_ALEN]; //源MAC地址
__u16 h_proto ; //网络层所使用的协议类型
}__attribute__((packed)) //用于告诉编译器不要对这个结构体中的缝隙部分进行填充操作
网络层
ipv4
if (key->eth.type == htons(ETH_P_IP)) {
struct iphdr *nh;
__be16 offset;
error = check_iphdr(skb);
nh = ip_hdr(skb);
key->ipv4.addr.src = nh->saddr;
key->ipv4.addr.dst = nh->daddr;
key->ip.proto = nh->protocol;
key->ip.tos = nh->tos;
key->ip.ttl = nh->ttl;
//偏移
offset = nh->frag_off & htons(IP_OFFSET);
if (offset) {
key->ip.frag = OVS_FRAG_TYPE_LATER;
return 0;
}
//处理分片
#ifdef HAVE_SKB_GSO_UDP
if (nh->frag_off & htons(IP_MF) ||
skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
#else
if (nh->frag_off & htons(IP_MF))
#endif
key->ip.frag = OVS_FRAG_TYPE_FIRST;
else
key->ip.frag = OVS_FRAG_TYPE_NONE;
ARP或RARP
else if (key->eth.type == htons(ETH_P_ARP) ||
key->eth.type == htons(ETH_P_RARP)) {
struct arp_eth_header *arp;
bool arp_available = arphdr_ok(skb);
arp = (struct arp_eth_header *)skb_network_header(skb);
if (arp_available &&
arp->ar_hrd == htons(ARPHRD_ETHER) &&
arp->ar_pro == htons(ETH_P_IP) &&
arp->ar_hln == ETH_ALEN &&
arp->ar_pln == 4) {
/* We only match on the lower 8 bits of the opcode. */
if (ntohs(arp->ar_op) <= 0xff)
key->ip.proto = ntohs(arp->ar_op);
else
key->ip.proto = 0;
memcpy(&key->ipv4.addr.src, arp->ar_sip, sizeof(key->ipv4.addr.src));
memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst));
ether_addr_copy(key->ipv4.arp.sha, arp->ar_sha);
ether_addr_copy(key->ipv4.arp.tha, arp->ar_tha);
} else {
memset(&key->ip, 0, sizeof(key->ip));
memset(&key->ipv4, 0, sizeof(key->ipv4));
}
}
mpls
else if (eth_p_mpls(key->eth.type)) {
size_t stack_len = MPLS_HLEN;
skb_set_inner_network_header(skb, skb->mac_len);
while (1) {
__be32 lse;
memcpy(&lse, skb_inner_network_header(skb), MPLS_HLEN);
if (stack_len == MPLS_HLEN)
memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN);
skb_set_inner_network_header(skb, skb->mac_len + stack_len);
if (lse & htonl(MPLS_LS_S_MASK))
break;
stack_len += MPLS_HLEN;
}
}
ipv6
else if (key->eth.type == htons(ETH_P_IPV6)) {
int nh_len; /* IPv6 Header + Extensions */
nh_len = parse_ipv6hdr(skb, key);
if (unlikely(nh_len < 0)) {
switch (nh_len) {
case -EINVAL:
memset(&key->ip, 0, sizeof(key->ip));
memset(&key->ipv6.addr, 0, sizeof(key->ipv6.addr));
/* fall-through */
case -EPROTO:
skb->transport_header = skb->network_header;
error = 0;
break;
default:
error = nh_len;
}
return error;
}
if (key->ip.frag == OVS_FRAG_TYPE_LATER)
return 0;
#ifdef HAVE_SKB_GSO_UDP
if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
key->ip.frag = OVS_FRAG_TYPE_FIRST;
#endif
nsh
else if (key->eth.type == htons(ETH_P_NSH)) {
error = parse_nsh(skb, key);
if (error)
return error;
}
传输层
TCP
//ipv4
if (key->ip.proto == IPPROTO_TCP) {
if (tcphdr_ok(skb)) {
struct tcphdr *tcp = tcp_hdr(skb);
key->tp.src = tcp->source;
key->tp.dst = tcp->dest;
key->tp.flags = TCP_FLAGS_BE16(tcp);
} else {
memset(&key->tp, 0, sizeof(key->tp));
}
}
//ipv6
if (key->ip.proto == NEXTHDR_TCP) {
if (tcphdr_ok(skb)) {
struct tcphdr *tcp = tcp_hdr(skb);
key->tp.src = tcp->source;
key->tp.dst = tcp->dest;
key->tp.flags = TCP_FLAGS_BE16(tcp);
} else {
memset(&key->tp, 0, sizeof(key->tp));
}
}
UDP
//ipv4
else if (key->ip.proto == IPPROTO_UDP) {
if (udphdr_ok(skb)) {
struct udphdr *udp = udp_hdr(skb);
key->tp.src = udp->source;
key->tp.dst = udp->dest;
} else {
memset(&key->tp, 0, sizeof(key->tp));
}
}
//ipv6
else if (key->ip.proto == NEXTHDR_UDP) {
if (udphdr_ok(skb)) {
struct udphdr *udp = udp_hdr(skb);
key->tp.src = udp->source;
key->tp.dst = udp->dest;
} else {
memset(&key->tp, 0, sizeof(key->tp));
}
}
SCTP
//ipv4
else if (key->ip.proto == IPPROTO_SCTP) {
if (sctphdr_ok(skb)) {
struct sctphdr *sctp = sctp_hdr(skb);
key->tp.src = sctp->source;
key->tp.dst = sctp->dest;
} else {
memset(&key->tp, 0, sizeof(key->tp));
}
}
//ipv6
else if (key->ip.proto == NEXTHDR_SCTP) {
if (sctphdr_ok(skb)) {
struct sctphdr *sctp = sctp_hdr(skb);
key->tp.src = sctp->source;
key->tp.dst = sctp->dest;
} else {
memset(&key->tp, 0, sizeof(key->tp));
}
}
ICMP
//ipv4
else if (key->ip.proto == IPPROTO_ICMP) {
if (icmphdr_ok(skb)) {
struct icmphdr *icmp = icmp_hdr(skb);
/* The ICMP type and code fields use the 16-bit
* transport port fields, so we need to store
* them in 16-bit network byte order.
*/
key->tp.src = htons(icmp->type);
key->tp.dst = htons(icmp->code);
} else {
memset(&key->tp, 0, sizeof(key->tp));
}
}
//ipv6
else if (key->ip.proto == NEXTHDR_ICMP) {
if (icmp6hdr_ok(skb)) {
error = parse_icmpv6(skb, key, nh_len);
if (error)
return error;
} else {
memset(&key->tp, 0, sizeof(key->tp));
}
}
ct层:
static inline void ovs_ct_fill_key(const struct sk_buff *skb,
struct sw_flow_key *key)
{
key->ct_state = 0;
key->ct_zone = 0;
key->ct.mark = 0;
memset(&key->ct.labels, 0, sizeof(key->ct.labels));
/* Clear 'ct_orig_proto' to mark the non-existence of original
* direction key fields.
*/
key->ct_orig_proto = 0;
}
总结:
各层的协议分析一般均按以下几个步骤:
1、定义一个该层协议头结构体。
2、调用api从skb中获取该协议头。
3、根据协议头中的字段,将各字段拷贝到sw_flow_key中。