延續Linux Modules(12)- netfilter我們由nf_register_hooks()來看看netfilter這個framework是如何運作的。
struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS] __read_mostly; int nf_register_hook(struct nf_hook_ops *reg) { struct nf_hook_ops *elem; int err; err = mutex_lock_interruptible(&nf_hook_mutex); if (err < 0) return err; list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) { if (reg->priority < elem->priority) break; } list_add_rcu(®->list, elem->list.prev); mutex_unlock(&nf_hook_mutex); return 0; } void nf_unregister_hook(struct nf_hook_ops *reg) { mutex_lock(&nf_hook_mutex); list_del_rcu(®->list); mutex_unlock(&nf_hook_mutex); synchronize_net(); }nf_hook_register_hook()其實就是在將要註冊的callback function依照所屬的protocol family以及hooknum插入struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS],並且會依照priority由小到大,而nf_unregister_hook()就是很簡單的reg由nf_hooks中移除。
接著我們再來看看nf_iterate(),程式碼中以//為註解方式,且為粗體字型就是我的註解。
unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb, unsigned int hook, const struct net_device *indev, const struct net_device *outdev, struct list_head **i, int (*okfn)(struct sk_buff *), int hook_thresh) { unsigned int verdict; /* * The caller must not block between calls to this * function because of risk of continuing from deleted element. */ list_for_each_continue_rcu(*i, head) { struct nf_hook_ops *elem = (struct nf_hook_ops *)*i; // 註冊的priority必須小於等於hook_thresh才會被執行 if (hook_thresh > elem->priority) continue; /* Optimization: we don't need to hold module reference here, since function can't sleep. --RR */ //丟進註冊的hook function執行 verdict = elem->hook(hook, skb, indev, outdev, okfn); if (verdict != NF_ACCEPT) { #ifdef CONFIG_NETFILTER_DEBUG if (unlikely((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT)) { NFDEBUG("Evil return from %p(%u).\n", elem->hook, hook); continue; } #endif //如果不是NF_ACCEPT而且也不是NF_REPEAT就回傳verdict // (NF_DROP/NF_STOLEN/NF_QUEUE) if (verdict != NF_REPEAT) return verdict; //會執行到這邊就是NF_REPEAT啦 *i = (*i)->prev; } // 如果verdict是NF_ACCEPT就會繼續往下一個hook function執行 } //如果沒有任何的hook function或者所有的hook function都是NF_ACCEPT return NF_ACCEPT; }
/* Returns 1 if okfn() needs to be executed by the caller, * -EPERM for NF_DROP, 0 otherwise. */ int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *), int hook_thresh) { struct list_head *elem; unsigned int verdict; int ret = 0; /* We may already have this, but read-locks nest anyway */ rcu_read_lock(); elem = &nf_hooks[pf][hook]; next_hook: // 將nf_hooks[pf][hook]這個linked list丟進nf_iterate()中執行 verdict = nf_iterate(&nf_hooks[pf][hook], skb, hook, indev, outdev, &elem, okfn, hook_thresh); if (verdict == NF_ACCEPT || verdict == NF_STOP) { // 如果是NF_ACCEPT或NF_STOP就回傳1, 到時候NF_HOOK()/NF_HOOK_COND() // 等macro就會執行okfn, 前面的註解也有說明 ret = 1; } else if (verdict == NF_DROP) { // 如果是NF_DROP就會free resource並且回傳!1, 就是不會呼叫okfn()了 kfree_skb(skb); ret = -EPERM; } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { // 如果是QUEUE就會將他nf_queue()將資訊暫時存起來, 等候處理 if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn, verdict >> NF_VERDICT_BITS)) goto next_hook; } rcu_read_unlock(); // 執行到這邊有可能是NF_STOLEN, 但ret = 0, 所以不會執行okfn, // NF_STOLEN會改變packet原本要走的路徑 return ret; }
#iddef CONFIG_NETFILTER int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *), int thresh); /** * nf_hook_thresh - call a netfilter hook * * Returns 1 if the hook has allowed the packet to pass. The function * okfn must be invoked by the caller in this case. Any other return * value indicates the packet has been consumed by the hook. */ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *), int thresh) { #ifndef CONFIG_NETFILTER_DEBUG if (list_empty(&nf_hooks[pf][hook])) return 1; #endif return nf_hook_slow(pf, hook, skb, indev, outdev, okfn, thresh); } static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *)) { return nf_hook_thresh(pf, hook, skb, indev, outdev, okfn, INT_MIN); } /* Activate hook; either okfn or kfree_skb called, unless a hook returns NF_STOLEN (in which case, it's up to the hook to deal with the consequences). Returns -ERRNO if packet dropped. Zero means queued, stolen or accepted. */ /* RR: > I don't want nf_hook to return anything because people might forget > about async and trust the return value to mean "packet was ok". AK: Just document it clearly, then you can expect some sense from kernel coders :) */ static inline int NF_HOOK_THRESH(uint8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *in, struct net_device *out, int (*okfn)(struct sk_buff *), int thresh) { int ret = nf_hook_thresh(pf, hook, skb, in, out, okfn, thresh); if (ret == 1) ret = okfn(skb); return ret; } static inline int NF_HOOK_COND(uint8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *in, struct net_device *out, int (*okfn)(struct sk_buff *), bool cond) { int ret; if (!cond || (ret = nf_hook_thresh(pf, hook, skb, in, out, okfn, INT_MIN) == 1)) ret = okfn(skb); return ret; } static inline int NF_HOOK(uint8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *in, struct net_device *out, int (*okfn)(struct sk_buff *)) { return NF_HOOK_THRESH(pf, hook, skb, in, out, okfn, INT_MIN); } #else /* !CONFIG_NETFILTER */ #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb) #define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) (okfn)(skb) static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *), int thresh) { return okfn(skb); } static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *)) { return 1; } #endif /*CONFIG_NETFILTER*/如果沒有defined CONFIG_NETFILTER,NF_HOOK()其實就是直接呼叫okfn了。到這邊對於netfilter的運作就有基本的認識了,有機會hack其他關於netfilter的心得再和大家分享。
Kernel version:2.6.36