這一篇藉由LDD(Linux Device Drivers)中的SNULL來了解最基本的Network Device Driver的架構,本章的sample code比原本的SNULL更為簡化,但是Network Topology是相同的,讓interface sn0/sn1可以透過遠方虛擬的remote0/remote1彼此溝通。
最基本的Network Device Driver的寫法就是allocate network device, "struct net_device"並且賦予hook function, "struct net_device_ops",然後將該network device註冊到kernel中,Kernel就可以調用該Network device,最基本的net_device_ops包含
- ndo_open() and ndo_validate_addr() are called, when the NIC is bring up.
- ndo_stop() is called, when the NIC is shut down.
- ndo_start_xmit() is called, when a packet is sent from the NIC.
- ndo_change_mtu() is called, when the MTU of the NIC is changed.
- ndo_set_mac_address() is called, when the MAC address of the NIC is changed.
以下是demo code的net_device_ops部分
static const struct net_device_ops nic_netdev_ops = {
.ndo_open = nic_open,
.ndo_validate_addr = nic_validate_addr,
.ndo_stop = nic_close,
.ndo_start_xmit = nic_start_xmit,
.ndo_change_mtu = nic_change_mtu,
.ndo_set_mac_address = nic_set_mac_addr,
};
我們是模擬ethernet,而ethernet有一些hook function可以用,如下
- ndo_validate_addr() -> eth_validate_addr().
- ndo_change_mtu() -> eth_change_mtu().
- ndo_set_mac_address() -> eth_mac_addr().
在demo code中,ndo_validate_addr()/ndo_change_mtu()/ndo_set_mac_address()我都是將其轉成ethernet的default hook function,我沒直接掛,是因為我想印出訊息來看
static int nic_validate_addr(struct net_device *netdev)
{
struct nic_priv *priv = netdev_priv(netdev);
netif_info(priv, drv, netdev, "%s(#%d), priv:%p\n",
__func__, __LINE__, priv);
return eth_validate_addr(netdev);
}
static int nic_change_mtu(struct net_device *netdev, int new_mtu)
{
struct nic_priv *priv = netdev_priv(netdev);
netif_info(priv, drv, netdev, "%s(#%d), priv:%p\n",
__func__, __LINE__, priv);
return eth_change_mtu(netdev, new_mtu);
}
static int nic_set_mac_addr(struct net_device *netdev, void *addr)
{
struct nic_priv *priv = netdev_priv(netdev);
netif_info(priv, drv, netdev, "%s(#%d), priv:%p\n",
__func__, __LINE__, priv);
return eth_mac_addr(netdev, addr);
}
另外幾個比較重要的function是netif_start_queue()/netif_stop_queue()
- netif_start_queue()是通知上層,可以將資料送到該網卡,通常放在ndo_open()裡面
- netif_stop_queue()是通知上層,停止將資料送到該網卡,通常放在ndo_stop()裡面
由於我們沒有真的remote0/remote1可以回應,所以必須設定flag/IFF_NOARP在sn0跟sn1,並且自己要處理L2的header,所以必須在額外掛上"struct header_ops"。
以下為完整的driver,主要code都是印訊息觀察driver的call flow
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/inetdevice.h>
MODULE_AUTHOR("Brook");
MODULE_DESCRIPTION("Kernel module for demo");
MODULE_LICENSE("GPL");
#define MAX_ETH_FRAME_SIZE 1792
struct nic_priv {
unsigned char *tx_buf;
unsigned int tx_len;
u32 msg_enable;
};
static struct net_device *nic_dev[2];
#define DEF_MSG_ENABLE 0xffff
static void dump(unsigned char *buf)
{
unsigned char *p, sbuf[2*(sizeof(struct ethhdr) + sizeof(struct iphdr))];
int i;
p = sbuf;
for(i = 0; i < sizeof(struct ethhdr); i++) {
p += sprintf(p, "%02X ", buf[i]);
}
printk("eth %s\n", sbuf);
p = sbuf;
for(i = 0; i < sizeof(struct iphdr); i++) {
p += sprintf(p, "%02X ", buf[sizeof(struct ethhdr) + i]);
}
printk("iph %s\n", sbuf);
p = sbuf;
for(i = 0; i < 4; i++) {
p += sprintf(p, "%02X ", buf[sizeof(struct ethhdr) + sizeof(struct iphdr) + i]);
}
printk("payload %s\n", sbuf);
}
static void
nic_rx(struct net_device *netdev, int len, unsigned char *buf)
{
struct sk_buff *skb;
struct nic_priv *priv = netdev_priv(netdev);
netif_info(priv, hw, netdev, "%s(#%d), rx:%d\n",
__func__, __LINE__, len);
skb = dev_alloc_skb(len + 2);
if (!skb) {
netif_err(priv, rx_err, netdev,
"%s(#%d), rx: low on mem - packet dropped\n",
__func__, __LINE__);
netdev->stats.rx_dropped++;
return;
}
skb_reserve(skb, 2);
memcpy(skb_put(skb, len), buf, len);
skb->dev = netdev;
skb->protocol = eth_type_trans(skb, netdev);
skb->ip_summed = CHECKSUM_UNNECESSARY;
netdev->stats.rx_packets++;
netdev->stats.rx_bytes += len;
netif_rx(skb);
}
static int nic_open(struct net_device *netdev)
{
struct nic_priv *priv = netdev_priv(netdev);
netif_info(priv, ifup, netdev, "%s(#%d), priv:%p\n",
__func__, __LINE__, priv);
priv->tx_buf = kmalloc(MAX_ETH_FRAME_SIZE, GFP_KERNEL);
if (priv->tx_buf == NULL) {
netif_info(priv, ifup, netdev, "%s(#%d), cannot alloc tx buf\n",
__func__, __LINE__);
return -ENOMEM;
}
netif_start_queue(netdev);
return 0;
}
static int nic_close(struct net_device *netdev)
{
struct nic_priv *priv = netdev_priv(netdev);
netif_info(priv, ifdown, netdev, "%s(#%d), priv:%p\n",
__func__, __LINE__, priv);
netif_stop_queue(netdev);
return 0;
}
static void nic_hw_xmit(struct net_device *netdev)
{
struct nic_priv *priv = netdev_priv(netdev);
struct iphdr *iph;
u32 *saddr, *daddr;
struct in_device* in_dev;
struct in_ifaddr* if_info;
if (priv->tx_len < sizeof(struct ethhdr) + sizeof(struct iphdr)) {
netif_info(priv, hw, netdev, "%s(#%d), too short\n",
__func__, __LINE__);
return;
}
dump(priv->tx_buf);
iph = (struct iphdr *)(priv->tx_buf + sizeof(struct ethhdr));
saddr = &iph->saddr;
daddr = &iph->daddr;
netif_info(priv, hw, netdev, "%s(#%d), orig, src:%pI4, dst:%pI4, len:%d\n",
__func__, __LINE__, saddr, daddr, priv->tx_len);
in_dev = nic_dev[(netdev == nic_dev[0] ? 1 : 0)]->ip_ptr;
if (in_dev) {
if_info = in_dev->ifa_list;
for (if_info = in_dev->ifa_list; if_info; if_info=if_info->ifa_next) {
#if 0
#endif
*saddr = *daddr = if_info->ifa_address;
((u8 *)saddr)[3]++;
netif_info(priv, hw, netdev, "%s(#%d), new, src:%pI4, dst:%pI4\n",
__func__, __LINE__, saddr, daddr);
break;
}
if (!if_info) {
netdev->stats.tx_dropped++;
netif_info(priv, hw, netdev, "%s(#%d), drop packet\n",
__func__, __LINE__);
return;
}
}
iph->check = 0;
iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
netdev->stats.tx_packets++;
netdev->stats.tx_bytes += priv->tx_len;
nic_rx(nic_dev[(netdev == nic_dev[0] ? 1 : 0)], priv->tx_len, priv->tx_buf);
}
static netdev_tx_t nic_start_xmit(struct sk_buff *skb,
struct net_device *netdev)
{
struct nic_priv *priv = netdev_priv(netdev);
netif_info(priv, drv, netdev, "%s(#%d), orig, src:%pI4, dst:%pI4\n",
__func__, __LINE__, &(ip_hdr(skb)->saddr), &(ip_hdr(skb)->daddr));
priv->tx_len = skb->len;
if (likely(priv->tx_len < MAX_ETH_FRAME_SIZE)) {
if (priv->tx_len < ETH_ZLEN) {
memset(priv->tx_buf, 0, ETH_ZLEN);
priv->tx_len = ETH_ZLEN;
}
skb_copy_and_csum_dev(skb, priv->tx_buf);
dev_kfree_skb_any(skb);
} else {
dev_kfree_skb_any(skb);
netdev->stats.tx_dropped++;
return NETDEV_TX_OK;
}
nic_hw_xmit(netdev);
return NETDEV_TX_OK;
}
static int nic_validate_addr(struct net_device *netdev)
{
struct nic_priv *priv = netdev_priv(netdev);
netif_info(priv, drv, netdev, "%s(#%d), priv:%p\n",
__func__, __LINE__, priv);
return eth_validate_addr(netdev);
}
static int nic_change_mtu(struct net_device *netdev, int new_mtu)
{
struct nic_priv *priv = netdev_priv(netdev);
netif_info(priv, drv, netdev, "%s(#%d), priv:%p\n",
__func__, __LINE__, priv);
return eth_change_mtu(netdev, new_mtu);
}
static int nic_set_mac_addr(struct net_device *netdev, void *addr)
{
struct nic_priv *priv = netdev_priv(netdev);
netif_info(priv, drv, netdev, "%s(#%d), priv:%p\n",
__func__, __LINE__, priv);
return eth_mac_addr(netdev, addr);
}
int snull_header(struct sk_buff *skb, struct net_device *netdev,
unsigned short type, const void *daddr, const void *saddr,
unsigned len)
{
struct nic_priv *priv = netdev_priv(netdev);
struct ethhdr *eth = (struct ethhdr *)skb_push(skb, ETH_HLEN);
struct net_device *dst_netdev;
netif_info(priv, drv, netdev, "%s(#%d)\n",
__func__, __LINE__);
dst_netdev = nic_dev[(netdev == nic_dev[0] ? 1 : 0)];
eth->h_proto = htons(type);
memcpy(eth->h_source, saddr ? saddr : netdev->dev_addr, netdev->addr_len);
memcpy(eth->h_dest, dst_netdev->dev_addr, dst_netdev->addr_len);
return (netdev->hard_header_len);
}
static const struct header_ops snull_header_ops = {
.create = snull_header,
};
static const struct net_device_ops nic_netdev_ops = {
.ndo_open = nic_open,
.ndo_validate_addr = nic_validate_addr,
.ndo_stop = nic_close,
.ndo_start_xmit = nic_start_xmit,
.ndo_change_mtu = nic_change_mtu,
.ndo_set_mac_address = nic_set_mac_addr,
};
static struct net_device* nic_alloc_netdev(void)
{
struct net_device *netdev;
netdev = alloc_etherdev(sizeof(struct nic_priv));
if (!netdev) {
pr_err("%s(#%d): alloc dev failed",
__func__, __LINE__);
return NULL;
}
eth_hw_addr_random(netdev);
netdev->netdev_ops = &nic_netdev_ops;
netdev->flags |= IFF_NOARP;
netdev->features |= NETIF_F_HW_CSUM;
netdev->header_ops = &snull_header_ops;
return netdev;
}
static int __init brook_init(void)
{
int ret;
struct nic_priv *priv;
nic_dev[0] = nic_alloc_netdev();
if (!nic_dev[0]) {
pr_err("%s(#%d): alloc netdev[0] failed", __func__, __LINE__);
return -ENOMEM;
}
nic_dev[1] = nic_alloc_netdev();
if (!nic_dev[1]) {
pr_err("%s(#%d): alloc netdev[1] failed", __func__, __LINE__);
ret = -ENOMEM;
goto alloc_2nd_failed;
}
ret = register_netdev(nic_dev[0]);
if (ret) {
pr_err("%s(#%d): reg net driver failed. ret:%d",
__func__, __LINE__, ret);
goto reg1_failed;
}
ret = register_netdev(nic_dev[1]);
if (ret) {
pr_err("%s(#%d): reg net driver failed. ret:%d",
__func__, __LINE__, ret);
goto reg2_failed;
}
priv = netdev_priv(nic_dev[0]);
priv->msg_enable = DEF_MSG_ENABLE;
priv = netdev_priv(nic_dev[1]);
priv->msg_enable = DEF_MSG_ENABLE;
return 0;
reg2_failed:
unregister_netdev(nic_dev[0]);
reg1_failed:
free_netdev(nic_dev[1]);
alloc_2nd_failed:
free_netdev(nic_dev[0]);
return ret;
}
module_init(brook_init);
static void __exit brook_exit(void)
{
int i;
pr_info("%s(#%d): remove module", __func__, __LINE__);
for (i = 0; i < ARRAY_SIZE(nic_dev); i++) {
unregister_netdev(nic_dev[i]);
free_netdev(nic_dev[i]);
}
}
module_exit(brook_exit);
參考資料:
- Linux Device Drivers, Third Edition, Chapter 17: Network Drivers, https://lwn.net/Kernel/LDD3/,
- Linux Networking and Network Devices APIs, https://www.kernel.org/doc/htmldocs/networking/index.html