關於NETLINK的介紹請看
Netlink introduction,這裡假設您已經了解NETLINK,並且準備使用NETLINK_FIREWALL這個netlink family,這個family
必須載入ip_queue.ko這個module。而或者您已經直接將他編進kernel當中。
我們由kernel的觀點來看NETLINK_FIREWALL提供哪些功能,首先看到net/ipv4/netfilter/ip_queue.c
static int __init ip_queue_init(void)
{
...
//註冊NETLINK_FIREWALL的handler,即ipq_rcv_skb
ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0,
ipq_rcv_skb, NULL, THIS_MODULE);
...
}
static void
ipq_rcv_skb(struct sk_buff *skb)
{
mutex_lock(&ipqnl_mutex);
__ipq_rcv_skb(skb);
mutex_unlock(&ipqnl_mutex);
}
static inline void
__ipq_rcv_skb(struct sk_buff *skb)
{
...
status = ipq_receive_peer(NLMSG_DATA(nlh), type,
nlmsglen - NLMSG_LENGTH(0));
if (status < 0)
RCV_SKB_FAIL(status);
if (flags & NLM_F_ACK)
netlink_ack(skb, nlh, 0);
}
// 這裡就是提供NETLINK_FIREWALL control功能的function了
// 包含了設定copy to user-space的packet型態,
// 以及設定packet的verdict(NF_DROP/NF_ACCEPT等)
static int
ipq_receive_peer(struct ipq_peer_msg *pmsg,
unsigned char type, unsigned int len)
{
int status = 0;
if (len < sizeof(*pmsg))
return -EINVAL;
switch (type) {
case IPQM_MODE:
// 設定copy到user-space的模式為何?IPQ_COPY_META或是IPQ_COPY_PACKET
status = ipq_set_mode(pmsg->msg.mode.value,
pmsg->msg.mode.range);
break;
case IPQM_VERDICT:
// packet的verdict
if (pmsg->msg.verdict.value > NF_MAX_VERDICT)
status = -EINVAL;
else
status = ipq_set_verdict(&pmsg->msg.verdict,
len - sizeof(*pmsg));
break;
default:
status = -EINVAL;
}
return status;
}
上述這段code就能大概了解NETLINK_FIREWALL在kernel的流程與提供的facility為何,透過IPQM_MODE設定copy to user-space的資料模式,當user-space收到資料後,判斷該資料是要DROP還是ACCEPT,決定後再透過IPQM_VERDICT告訴kernel,該封包是要DROP還是ACCEPT。
初步了解kernel提供的功能之後,下面就寫一個當接收到icmp echo封包,且seq為奇數的就DROP,其餘的就ACCEPT的範例。
#include <stdio.h>
#include <stdint.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <errno.h>
#include <string.h>
#include <unistd.h>
#include <netinet/ip.h>
#include <netinet/ip_icmp.h>
#include <netinet/in.h>
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4/ip_queue.h>
/**
* 建立socket
*/
static int create_nl_socket(int proto)
{
int sock;
struct sockaddr_nl addr;
if ((sock = socket(AF_NETLINK, SOCK_RAW, proto)) < 0) {
fprintf(stderr, "open sock failed.(%s)\n", strerror(errno));
return -1;
}
memset(&addr, 0, sizeof(addr));
addr.nl_family = AF_NETLINK;
addr.nl_pid = getpid();
if (bind(sock, (struct sockaddr*)&addr, sizeof(addr)) < 0) {
fprintf(stderr, "bind failed.(%s)\n", strerror(errno));
goto bind_err;
}
return sock;
bind_err:
close(sock);
return -1;
}
/**
* 設定IPQM_MODE
*/
static int ipq_set_mode(int sock, uint8_t mode, size_t range)
{
unsigned char buf[1024];
struct msghdr msg;
struct sockaddr_nl dst = { .nl_family = AF_NETLINK };
struct nlmsghdr *nlh;
struct ipq_peer_msg *pmsg;
struct iovec iov = {
.iov_base = (void *) buf,
.iov_len = sizeof(buf)
};
memset(buf, 0, sizeof(buf));
msg = (struct msghdr) {
.msg_name = (void *)&dst,
.msg_namelen = sizeof(dst),
.msg_iov = &iov,
.msg_iovlen = 1,
};
nlh = (struct nlmsghdr*) buf;
*nlh = (struct nlmsghdr) {
.nlmsg_len = sizeof(buf),
.nlmsg_flags = NLM_F_REQUEST,
.nlmsg_type = IPQM_MODE,
.nlmsg_pid = getpid(),
};
pmsg = (struct ipq_peer_msg*) NLMSG_DATA(nlh);
*pmsg = (struct ipq_peer_msg) {
.msg.mode.value = mode, // IPQM_META或是IPQM_PACKET
.msg.mode.range = range, // 封包的大小
};
printf("%s(#%d): nlmsglen:%d, NLMSG_LENGTH(0):%d\n",
__func__, __LINE__, nlh->nlmsg_len, NLMSG_LENGTH(0));
return sendmsg(sock, &msg, 0);
}
/**
* 列印封包內容
*/
static void print_pkt(ipq_packet_msg_t *ipq_pkt)
{
int i;
printf("packet_id:0x%lx, mark:0x%lx\n,"
"hook:%d, idev:%s, odev:%s\n,"
"hw_proto:%d, hw_type:%d, hw_addrlen:%d\n,"
"hw_addr:0x%02X%02X%02X%02X%02X%02X%02X%02X\n,"
"data_len:%ld, payload:\n",
ipq_pkt->packet_id, ipq_pkt->mark,
ipq_pkt->hook, ipq_pkt->indev_name, ipq_pkt->outdev_name,
ipq_pkt->hw_protocol, ipq_pkt->hw_type, ipq_pkt->hw_addrlen,
ipq_pkt->hw_addr[0], ipq_pkt->hw_addr[1],
ipq_pkt->hw_addr[2], ipq_pkt->hw_addr[3],
ipq_pkt->hw_addr[4], ipq_pkt->hw_addr[5],
ipq_pkt->hw_addr[6], ipq_pkt->hw_addr[7],
ipq_pkt->data_len);
for (i = 0; i < ipq_pkt->data_len; i++) {
printf("%02X ", ipq_pkt->payload[i]);
if (!((i+1) % 16)) printf("\n");
}
}
/**
* 根據封包內容給verdict
*/
static void
get_verdict(ipq_packet_msg_t *ipq_pkt, int *verdict, unsigned long *id)
{
struct iphdr *iph;
struct icmphdr *icmph;
*id = ipq_pkt->packet_id;
if (ipq_pkt->data_len < sizeof(struct iphdr)) {
*verdict = NF_DROP;
return;
}
iph = (struct iphdr *) ipq_pkt->payload;
if (iph->protocol == IPPROTO_ICMP) {
icmph = (struct icmphdr *) (ipq_pkt->payload + iph->ihl * 4);
printf("Type: %d, Id:0x%04x, seq:0x%04x\n",
icmph->type, ntohs(icmph->un.echo.id),
ntohs(icmph->un.echo.sequence));
// 序號為奇數就將之DROP
if (ntohs(icmph->un.echo.sequence) % 2) {
*verdict = NF_DROP;
return;
}
}
// 其餘就是ACCEPT
*verdict = NF_ACCEPT;
}
/**
* 設定封包的verdict
*/
static int set_verdict(int sock, int verdict, unsigned long id)
{
unsigned char buf[1024];
struct msghdr msg;
struct sockaddr_nl dst = { .nl_family = AF_NETLINK };
struct nlmsghdr *nlh;
struct ipq_peer_msg *pmsg;
struct iovec iov = { .iov_base = (void *) buf, .iov_len = sizeof(buf) };
memset(buf, 0, sizeof(buf));
msg = (struct msghdr) {
.msg_name = (void *)&dst,
.msg_namelen = sizeof(dst),
.msg_iov = &iov,
.msg_iovlen = 1,
};
nlh = (struct nlmsghdr*) buf;
*nlh = (struct nlmsghdr) {
.nlmsg_len = sizeof(buf),
.nlmsg_flags = NLM_F_REQUEST,
.nlmsg_type = IPQM_VERDICT,
.nlmsg_pid = getpid(),
};
pmsg = (struct ipq_peer_msg*) NLMSG_DATA(nlh);
*pmsg = (struct ipq_peer_msg) {
.msg.verdict.value = verdict, // NF_DROP或是NF_ACCEPT
// packet_id詳細資料請看kernel的ipq_set_verdict()
.msg.verdict.id = id,
};
char *p = "NONE";
switch (verdict) {
case NF_DROP:
p = "DROP";
break;
case NF_ACCEPT:
p = "ACCEPT";
break;
}
printf("%s(#%d): %s packet %ld\n", __func__, __LINE__, p, id);
return sendmsg(sock, &msg, 0);
}
/**
* 處理接收到的封包
*/
static int ipq_recv_pkt(int sock, size_t len)
{
unsigned char buf[NLMSG_SPACE(0) + len];
struct msghdr msg;
struct sockaddr_nl dst = { .nl_family = AF_NETLINK };
struct nlmsghdr *nlh;
struct iovec iov = {
.iov_base = (void *) buf,
.iov_len = len,
};
memset(buf, 0, sizeof(buf));
msg = (struct msghdr) {
.msg_name = (void *)&dst,
.msg_namelen = sizeof(dst),
.msg_iov = &iov,
.msg_iovlen = 1,
};
len = recvmsg(sock, &msg, 0);
for (nlh = (struct nlmsghdr *) buf; NLMSG_OK (nlh, len);
nlh = NLMSG_NEXT (nlh, len)) {
/* The end of multipart message. */
if (nlh->nlmsg_type == NLMSG_DONE) {
printf("NLMSG_DONE\n");
return 0;
}
/* Do some error handling. */
if (nlh->nlmsg_type == NLMSG_ERROR) {
fprintf(stderr, "NLMSG_ERROR\n");
return -1;
}
if (nlh->nlmsg_type == IPQM_PACKET) {
int verdict;
unsigned long id;
print_pkt(NLMSG_DATA(nlh));
get_verdict(NLMSG_DATA(nlh), &verdict, &id);
set_verdict(sock, verdict, id);
}
}
return 0;
}
int main(int argc, char *argv[])
{
int sock, ret, cnt;
sock = create_nl_socket(NETLINK_FIREWALL);
if (sock < 0) {
fprintf(stderr, "create_nl_socket failed\n");
return -1;
}
ret = ipq_set_mode(sock, IPQ_COPY_PACKET, 2048);
if (ret < 0) {
fprintf(stderr, "ipq_set_mode failed\n");
} else {
printf("ipq_set_mode success\n");
}
for (cnt = 0; cnt < 10; cnt++) {
ret = ipq_recv_pkt(sock, 2048);
}
close(sock);
return 0;
}
透過iptable將經過OUTPUT chain的packet送到QUEUE去,這樣kernel才會將packet丟到NFTLINK_FIRWALL處理。
您可以看到kernel送出來的packet內容是從IP header開始。
您可以發現ping有一半的packet被DROP了。
Kernel version:2.6.37