1 问题
# ls /sys/class/net/eth1/device/virtfn2/net/
dev8
# ip link show eth1
2: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP qlen 1000
link/ether 8c:dc:d4:b1:60:c0 brd ff:ff:ff:ff:ff:ff
vf 0 MAC 14:05:0a:f5:ac:36, vlan 3
vf 1 MAC 14:05:0a:f5:ac:3a, vlan 3
vf 2 MAC 14:05:0a:f5:ac:3e, vlan 3
vf 3 MAC 14:05:0a:f5:ac:42, vlan 3
vf 4 MAC 14:05:0a:f5:ac:46, vlan 3
vf 5 MAC 00:00:00:00:00:00
vf 6 MAC 00:00:00:00:00:00
# ip link show dev8
8: dev8: <BROADCAST,MULTICAST> mtu 1500 qdisc pfifo_fast state DOWN qlen 1000
link/ether 14:05:0a:f5:ac:3e brd ff:ff:ff:ff:ff:ff
直接设置VF设备dev8的MAC返回错误:
# ip link set dev8 address 14:05:00:f5:ac:3e
RTNETLINK answers: Cannot assign requested address
# dmesg
[682286.034307] igb 0000:03:00.0: VF 2 attempted to override administratively set MAC address
[682286.034307] Reload the VF driver to resume operations
通过PF设置VF的MAC没有返回错误:
# ip link set eth1 vf 2 mac 14:05:00:f5:ac:3e
# dmesg
[682350.583348] igb 0000:03:00.0: setting MAC 14:05:00:f5:ac:3e on VF 2
[682350.583351] igb 0000:03:00.0: Reload the VF driver to make this change effective.
# ip link show eth1
2: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP qlen 1000
link/ether 8c:dc:d4:b1:60:c0 brd ff:ff:ff:ff:ff:ff
vf 0 MAC 14:05:0a:f5:ac:36, vlan 3
vf 1 MAC 14:05:0a:f5:ac:3a, vlan 3
vf 2 MAC 14:05:00:f5:ac:3e, vlan 3
...
# ip link show dev8
8: dev8: <BROADCAST,MULTICAST> mtu 1500 qdisc pfifo_fast state DOWN qlen 1000
link/ether 14:05:0a:f5:ac:3e brd ff:ff:ff:ff:ff:ff
但是,新的MAC地址的确写到了PF的配置,但没有写到VF网络设备。
这里有2个问题:
(1)为什么不能通过第一种方式直接设置VF网络设备的MAC地址?
(2)通过第二种方式设置VF的MAC地址后,为什么不能反映到VF网络设备?
2 原因
先看看两者的区别与实现:
2.1 ip link set dev $VFDEV address $MAC
- VF端
最终会到VF的驱动igb/igbvf/netdev.c
/**
* igbvf_set_mac - Change the Ethernet Address of the NIC
* @netdev: network interface device structure
* @p: pointer to an address structure
*
* Returns 0 on success, negative on failure
**/
static int igbvf_set_mac(struct net_device *netdev, void *p)
{
struct igbvf_adapter *adapter = netdev_priv(netdev);
struct e1000_hw *hw = &adapter->hw;
struct sockaddr *addr = p;
if (!is_valid_ether_addr(addr->sa_data))
return -EADDRNOTAVAIL;
memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
hw->mac.ops.rar_set(hw, hw->mac.addr, 0); ///e1000_rar_set_vf
if (memcmp(addr->sa_data, hw->mac.addr, 6))
return -EADDRNOTAVAIL;
memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
return 0;
}
在到MAC地址拷贝到net_device->dev_addr之前,会调用e1000_rar_set_vf
,向PF发送E1000_VF_SET_MAC_ADDR
消息
/**
* e1000_rar_set_vf - set device MAC address
* @hw: pointer to the HW structure
* @addr: pointer to the receive address
* @index: receive address array register
**/
static void e1000_rar_set_vf(struct e1000_hw *hw, u8 * addr, u32 index)
{
struct e1000_mbx_info *mbx = &hw->mbx;
u32 msgbuf[3];
u8 *msg_addr = (u8 *)(&msgbuf[1]);
s32 ret_val;
memset(msgbuf, 0, 12);
msgbuf[0] = E1000_VF_SET_MAC_ADDR;
memcpy(msg_addr, addr, 6);
ret_val = mbx->ops.write_posted(hw, msgbuf, 3);
if (!ret_val)
ret_val = mbx->ops.read_posted(hw, msgbuf, 3); ///e1000_read_posted_mbx
msgbuf[0] &= ~E1000_VT_MSGTYPE_CTS;
/* if nacked the address was rejected, use "perm_addr" */
if (!ret_val &&
(msgbuf[0] == (E1000_VF_SET_MAC_ADDR | E1000_VT_MSGTYPE_NACK)))
e1000_read_mac_addr_vf(hw);
}
如果PF返回NACK(E1000_VF_SET_MAC_ADDR | E1000_VT_MSGTYPE_NACK),则使用perm_addr : |
/**
* e1000_read_mac_addr_vf - Read device MAC address
* @hw: pointer to the HW structure
**/
static s32 e1000_read_mac_addr_vf(struct e1000_hw *hw)
{
memcpy(hw->mac.addr, hw->mac.perm_addr, ETH_ALEN);
return E1000_SUCCESS;
}
- PF端
当PF收到VF的
E1000_VF_SET_MAC_ADDR
消息时,如果没有设置过IGB_VF_FLAG_PF_SET_MAC
标志,则更新PF驱动保存的有关VF的MAC信息;
static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
{
///...
retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
switch ((msgbuf[0] & 0xFFFF)) {
case E1000_VF_SET_MAC_ADDR:
retval = -EINVAL;
if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
else
dev_warn(&pdev->dev,
"VF %d attempted to override administratively set MAC address\nReload the VF driver to resume operations\n",
vf);
break;
msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
out:
/* notify the VF of the results of what it sent us */
if (retval)
msgbuf[0] |= E1000_VT_MSGTYPE_NACK; ///PF更新MAC失败
else
msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
igb_write_mbx(hw, msgbuf, 1, vf);
}
当PF更新MAC失败或者标志位IGB_VF_FLAG_PF_SET_MAC
设置时,会给VF返回E1000_VT_MSGTYPE_NACK
消息。
igb_set_vf_mac_addr
直接调用igb_set_vf_mac
:
static int igb_set_vf_mac(struct igb_adapter *adapter,
int vf, unsigned char *mac_addr)
{
struct e1000_hw *hw = &adapter->hw;
/* VF MAC addresses start at end of receive addresses and moves
* towards the first, as a result a collision should not be possible
*/
int rar_entry = hw->mac.rar_entry_count - (vf + 1);
memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
return 0;
}
2.2 ip link set dev eth1 vf 2 mac $MAC
当通过PF去设置VF的MAC地址时,内核会通过PF的驱动函数igb_ndo_set_vf_mac
更新PF驱动中文保存的VF的MAC信息igb_adapter->vf_data[vf]
,并设置IGB_VF_FLAG_PF_SET_MAC
标志,然后直接调用igb_set_vf_mac
。
static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
{
struct igb_adapter *adapter = netdev_priv(netdev);
if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
return -EINVAL;
adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
dev_info(&adapter->pdev->dev,
"Reload the VF driver to make this change effective.");
if (test_bit(__IGB_DOWN, &adapter->state)) {
dev_warn(&adapter->pdev->dev,
"The VF MAC address has been set, but the PF device is not up.\n");
dev_warn(&adapter->pdev->dev,
"Bring the PF device up before attempting to use the VF device.\n");
}
return igb_set_vf_mac(adapter, vf, mac);
}
到这里基本上明白了第一方式设置mac地址失败的原因了,因为一旦通过第二种方式设置了VF的MAC地址,就会设置 IGB_VF_FLAG_PF_SET_MAC
标示位,就能再使用第一种方式了。
下面继续讨论第二个问题。从igb_ndo_set_vf_mac
的提示可以看到,当我们通过PF去设置VF的MAC的时候,需要Reload the VF driver to make this change effective.
。
难道要得重新加载VF驱动,如果是这样的话,会对所有的VF都有影响。实际上,VF驱动在加载的时候,的确会从PF的配置读取VF的MAC信息,然后设置VF:
static int igbvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
///...
/*reset the controller to put the device in a known good state */
err = hw->mac.ops.reset_hw(hw);
if (err) {
dev_info(&pdev->dev,
"PF still in reset state. Is the PF interface up?\n");
} else {
err = hw->mac.ops.read_mac_addr(hw); ///read MAC from PF
if (err)
dev_info(&pdev->dev, "Error reading MAC address.\n");
else if (is_zero_ether_addr(adapter->hw.mac.addr))
dev_info(&pdev->dev, "MAC address not assigned by administrator.\n");
memcpy(netdev->dev_addr, adapter->hw.mac.addr, ///set MAC address
netdev->addr_len);
}
///...
}
此外,VF驱动函数igbvf_reset
也会设置网络设备地址net_device->dev_addr
:
static void igbvf_reset(struct igbvf_adapter *adapter)
{
struct e1000_mac_info *mac = &adapter->hw.mac;
struct net_device *netdev = adapter->netdev;
struct e1000_hw *hw = &adapter->hw;
/* Allow time for pending master requests to run */
if (mac->ops.reset_hw(hw)) ///e1000_reset_hw_vf
dev_err(&adapter->pdev->dev, "PF still resetting\n");
mac->ops.init_hw(hw);///e1000_init_hw_vf
if (is_valid_ether_addr(adapter->hw.mac.addr)) {
memcpy(netdev->dev_addr, adapter->hw.mac.addr, ///set net_device MAC
netdev->addr_len);
memcpy(netdev->perm_addr, adapter->hw.mac.addr,
netdev->addr_len);
}
adapter->last_reset = jiffies;
}
可以,VF驱动会用adapter->hw.mac.addr
的值,该值从哪里获取?
实际上reset_hw
,即e1000_reset_hw_vf
会向PF发送E1000_VF_RESET
消息,PF会返回MAC信息,VF读取然后保存在hw->mac.perm_addr
:
static s32 e1000_reset_hw_vf(struct e1000_hw *hw)
{
if (timeout) {
/* mailbox timeout can now become active */
mbx->timeout = E1000_VF_MBX_INIT_TIMEOUT;
/* notify pf of vf reset completion */
msgbuf[0] = E1000_VF_RESET;
mbx->ops.write_posted(hw, msgbuf, 1);
msleep(10);
/* set our "perm_addr" based on info provided by PF */
ret_val = mbx->ops.read_posted(hw, msgbuf, 3);
if (!ret_val) {
if (msgbuf[0] == (E1000_VF_RESET | E1000_VT_MSGTYPE_ACK))
memcpy(hw->mac.perm_addr, addr, 6); ///保存MAC
else
ret_val = -E1000_ERR_MAC_INIT;
}
}
init_hw
,即e1000_init_hw_vf
会尝试直接使用发送E1000_VF_SET_MAC_ADDR
,PF当然返回E1000_VT_MSGTYPE_NACK
。
static s32 e1000_init_hw_vf(struct e1000_hw *hw)
{
/* attempt to set and restore our mac address */
e1000_rar_set_vf(hw, hw->mac.addr, 0); ///上面已经分析
return E1000_SUCCESS;
}
此时,VF就会使用前面的hw->mac.perm_addr
覆盖hw->mac.addr
,到这里,hw->mac.addr
就保存从PF获取的VF的MAC信息。
最后,最重要的一点,igbvf_reset
什么时候会被调用?
实际上上,igbvf_down
会调用igbvf_reset
:
void igbvf_down(struct igbvf_adapter *adapter)
{
///...
igbvf_reset(adapter);
igbvf_clean_tx_ring(adapter->tx_ring);
igbvf_clean_rx_ring(adapter->rx_ring);
}
这意味着,我们只需要将VF shutdown,我们通过PF给VF设置的MAC信息就会反映到VF网络设备:
# ip link set dev8 up ##由于VF处于down状态,需要先将其UP
# ip link show dev8
8: dev8: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen 1000
link/ether 14:05:0a:f5:ac:3e brd ff:ff:ff:ff:ff:ff
# ip link show eth1
2: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP qlen 1000
link/ether 8c:dc:d4:b1:60:c0 brd ff:ff:ff:ff:ff:ff
vf 0 MAC 14:05:0a:f5:ac:36, vlan 3
vf 1 MAC 14:05:0a:f5:ac:3a, vlan 3
vf 2 MAC 14:05:00:f5:ac:3e, vlan 3
...
# ip link set dev8 down
# ip link show dev8
8: dev8: <BROADCAST,MULTICAST> mtu 1500 qdisc pfifo_fast state DOWN qlen 1000
link/ether 14:05:00:f5:ac:3e brd ff:ff:ff:ff:ff:ff
可以看到dev8
的地址从14:05:0a:f5:ac:3e
变成了14:05:00:f5:ac:3e
。
down对应的dmesg信息:
[699929.948823] igb 0000:03:00.0: VF 2 attempted to override administratively set MAC address
[699929.948823] Reload the VF driver to resume operations
[699929.950056] igb 0000:03:00.0: VF 2 attempted to override administratively set VLAN tag
[699929.950056] Reload the VF driver to resume operations
[699929.950539] igbvf 0000:03:11.0: Failed to remove vlan id 0
[699929.950543] failed to kill vid 0081/0 for device dev8
update at 2019-03-26
实际上第二种方式设置MAC是更安全的方式:
User can use IPROUTE2 utility to assign a unique MAC address to a VF from within the host Operating System. Once the new MAC address is assigned, VM that has this particular VF assigned to will not be able to alter its MAC address. This is called “administratively assigned MAC” and is a security feature.
参考这里.
3 总结
通过PF设置VF的MAC后,需要重启VF网络设备,VF才能同步到PF的MAC信息。