關於NETLINK的介紹請看Netlink introduction,這裡假設您已經了解NETLINK,並且準備使用NETLINK_FIREWALL這個netlink family,這個family必須載入ip_queue.ko這個module。而或者您已經直接將他編進kernel當中。
我們由kernel的觀點來看NETLINK_FIREWALL提供哪些功能,首先看到net/ipv4/netfilter/ip_queue.c
static int __init ip_queue_init(void) { ... //註冊NETLINK_FIREWALL的handler,即ipq_rcv_skb ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0, ipq_rcv_skb, NULL, THIS_MODULE); ... } static void ipq_rcv_skb(struct sk_buff *skb) { mutex_lock(&ipqnl_mutex); __ipq_rcv_skb(skb); mutex_unlock(&ipqnl_mutex); } static inline void __ipq_rcv_skb(struct sk_buff *skb) { ... status = ipq_receive_peer(NLMSG_DATA(nlh), type, nlmsglen - NLMSG_LENGTH(0)); if (status < 0) RCV_SKB_FAIL(status); if (flags & NLM_F_ACK) netlink_ack(skb, nlh, 0); } // 這裡就是提供NETLINK_FIREWALL control功能的function了 // 包含了設定copy to user-space的packet型態, // 以及設定packet的verdict(NF_DROP/NF_ACCEPT等) static int ipq_receive_peer(struct ipq_peer_msg *pmsg, unsigned char type, unsigned int len) { int status = 0; if (len < sizeof(*pmsg)) return -EINVAL; switch (type) { case IPQM_MODE: // 設定copy到user-space的模式為何?IPQ_COPY_META或是IPQ_COPY_PACKET status = ipq_set_mode(pmsg->msg.mode.value, pmsg->msg.mode.range); break; case IPQM_VERDICT: // packet的verdict if (pmsg->msg.verdict.value > NF_MAX_VERDICT) status = -EINVAL; else status = ipq_set_verdict(&pmsg->msg.verdict, len - sizeof(*pmsg)); break; default: status = -EINVAL; } return status; }上述這段code就能大概了解NETLINK_FIREWALL在kernel的流程與提供的facility為何,透過IPQM_MODE設定copy to user-space的資料模式,當user-space收到資料後,判斷該資料是要DROP還是ACCEPT,決定後再透過IPQM_VERDICT告訴kernel,該封包是要DROP還是ACCEPT。
初步了解kernel提供的功能之後,下面就寫一個當接收到icmp echo封包,且seq為奇數的就DROP,其餘的就ACCEPT的範例。
#include <stdio.h> #include <stdint.h> #include <sys/types.h> #include <sys/socket.h> #include <errno.h> #include <string.h> #include <unistd.h> #include <netinet/ip.h> #include <netinet/ip_icmp.h> #include <netinet/in.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv4/ip_queue.h> /** * 建立socket */ static int create_nl_socket(int proto) { int sock; struct sockaddr_nl addr; if ((sock = socket(AF_NETLINK, SOCK_RAW, proto)) < 0) { fprintf(stderr, "open sock failed.(%s)\n", strerror(errno)); return -1; } memset(&addr, 0, sizeof(addr)); addr.nl_family = AF_NETLINK; addr.nl_pid = getpid(); if (bind(sock, (struct sockaddr*)&addr, sizeof(addr)) < 0) { fprintf(stderr, "bind failed.(%s)\n", strerror(errno)); goto bind_err; } return sock; bind_err: close(sock); return -1; } /** * 設定IPQM_MODE */ static int ipq_set_mode(int sock, uint8_t mode, size_t range) { unsigned char buf[1024]; struct msghdr msg; struct sockaddr_nl dst = { .nl_family = AF_NETLINK }; struct nlmsghdr *nlh; struct ipq_peer_msg *pmsg; struct iovec iov = { .iov_base = (void *) buf, .iov_len = sizeof(buf) }; memset(buf, 0, sizeof(buf)); msg = (struct msghdr) { .msg_name = (void *)&dst, .msg_namelen = sizeof(dst), .msg_iov = &iov, .msg_iovlen = 1, }; nlh = (struct nlmsghdr*) buf; *nlh = (struct nlmsghdr) { .nlmsg_len = sizeof(buf), .nlmsg_flags = NLM_F_REQUEST, .nlmsg_type = IPQM_MODE, .nlmsg_pid = getpid(), }; pmsg = (struct ipq_peer_msg*) NLMSG_DATA(nlh); *pmsg = (struct ipq_peer_msg) { .msg.mode.value = mode, // IPQM_META或是IPQM_PACKET .msg.mode.range = range, // 封包的大小 }; printf("%s(#%d): nlmsglen:%d, NLMSG_LENGTH(0):%d\n", __func__, __LINE__, nlh->nlmsg_len, NLMSG_LENGTH(0)); return sendmsg(sock, &msg, 0); } /** * 列印封包內容 */ static void print_pkt(ipq_packet_msg_t *ipq_pkt) { int i; printf("packet_id:0x%lx, mark:0x%lx\n," "hook:%d, idev:%s, odev:%s\n," "hw_proto:%d, hw_type:%d, hw_addrlen:%d\n," "hw_addr:0x%02X%02X%02X%02X%02X%02X%02X%02X\n," "data_len:%ld, payload:\n", ipq_pkt->packet_id, ipq_pkt->mark, ipq_pkt->hook, ipq_pkt->indev_name, ipq_pkt->outdev_name, ipq_pkt->hw_protocol, ipq_pkt->hw_type, ipq_pkt->hw_addrlen, ipq_pkt->hw_addr[0], ipq_pkt->hw_addr[1], ipq_pkt->hw_addr[2], ipq_pkt->hw_addr[3], ipq_pkt->hw_addr[4], ipq_pkt->hw_addr[5], ipq_pkt->hw_addr[6], ipq_pkt->hw_addr[7], ipq_pkt->data_len); for (i = 0; i < ipq_pkt->data_len; i++) { printf("%02X ", ipq_pkt->payload[i]); if (!((i+1) % 16)) printf("\n"); } } /** * 根據封包內容給verdict */ static void get_verdict(ipq_packet_msg_t *ipq_pkt, int *verdict, unsigned long *id) { struct iphdr *iph; struct icmphdr *icmph; *id = ipq_pkt->packet_id; if (ipq_pkt->data_len < sizeof(struct iphdr)) { *verdict = NF_DROP; return; } iph = (struct iphdr *) ipq_pkt->payload; if (iph->protocol == IPPROTO_ICMP) { icmph = (struct icmphdr *) (ipq_pkt->payload + iph->ihl * 4); printf("Type: %d, Id:0x%04x, seq:0x%04x\n", icmph->type, ntohs(icmph->un.echo.id), ntohs(icmph->un.echo.sequence)); // 序號為奇數就將之DROP if (ntohs(icmph->un.echo.sequence) % 2) { *verdict = NF_DROP; return; } } // 其餘就是ACCEPT *verdict = NF_ACCEPT; } /** * 設定封包的verdict */ static int set_verdict(int sock, int verdict, unsigned long id) { unsigned char buf[1024]; struct msghdr msg; struct sockaddr_nl dst = { .nl_family = AF_NETLINK }; struct nlmsghdr *nlh; struct ipq_peer_msg *pmsg; struct iovec iov = { .iov_base = (void *) buf, .iov_len = sizeof(buf) }; memset(buf, 0, sizeof(buf)); msg = (struct msghdr) { .msg_name = (void *)&dst, .msg_namelen = sizeof(dst), .msg_iov = &iov, .msg_iovlen = 1, }; nlh = (struct nlmsghdr*) buf; *nlh = (struct nlmsghdr) { .nlmsg_len = sizeof(buf), .nlmsg_flags = NLM_F_REQUEST, .nlmsg_type = IPQM_VERDICT, .nlmsg_pid = getpid(), }; pmsg = (struct ipq_peer_msg*) NLMSG_DATA(nlh); *pmsg = (struct ipq_peer_msg) { .msg.verdict.value = verdict, // NF_DROP或是NF_ACCEPT // packet_id詳細資料請看kernel的ipq_set_verdict() .msg.verdict.id = id, }; char *p = "NONE"; switch (verdict) { case NF_DROP: p = "DROP"; break; case NF_ACCEPT: p = "ACCEPT"; break; } printf("%s(#%d): %s packet %ld\n", __func__, __LINE__, p, id); return sendmsg(sock, &msg, 0); } /** * 處理接收到的封包 */ static int ipq_recv_pkt(int sock, size_t len) { unsigned char buf[NLMSG_SPACE(0) + len]; struct msghdr msg; struct sockaddr_nl dst = { .nl_family = AF_NETLINK }; struct nlmsghdr *nlh; struct iovec iov = { .iov_base = (void *) buf, .iov_len = len, }; memset(buf, 0, sizeof(buf)); msg = (struct msghdr) { .msg_name = (void *)&dst, .msg_namelen = sizeof(dst), .msg_iov = &iov, .msg_iovlen = 1, }; len = recvmsg(sock, &msg, 0); for (nlh = (struct nlmsghdr *) buf; NLMSG_OK (nlh, len); nlh = NLMSG_NEXT (nlh, len)) { /* The end of multipart message. */ if (nlh->nlmsg_type == NLMSG_DONE) { printf("NLMSG_DONE\n"); return 0; } /* Do some error handling. */ if (nlh->nlmsg_type == NLMSG_ERROR) { fprintf(stderr, "NLMSG_ERROR\n"); return -1; } if (nlh->nlmsg_type == IPQM_PACKET) { int verdict; unsigned long id; print_pkt(NLMSG_DATA(nlh)); get_verdict(NLMSG_DATA(nlh), &verdict, &id); set_verdict(sock, verdict, id); } } return 0; } int main(int argc, char *argv[]) { int sock, ret, cnt; sock = create_nl_socket(NETLINK_FIREWALL); if (sock < 0) { fprintf(stderr, "create_nl_socket failed\n"); return -1; } ret = ipq_set_mode(sock, IPQ_COPY_PACKET, 2048); if (ret < 0) { fprintf(stderr, "ipq_set_mode failed\n"); } else { printf("ipq_set_mode success\n"); } for (cnt = 0; cnt < 10; cnt++) { ret = ipq_recv_pkt(sock, 2048); } close(sock); return 0; }
透過iptable將經過OUTPUT chain的packet送到QUEUE去,這樣kernel才會將packet丟到NFTLINK_FIRWALL處理。
您可以看到kernel送出來的packet內容是從IP header開始。
您可以發現ping有一半的packet被DROP了。
Kernel version:2.6.37