2010年11月27日 星期六

Linux Kernel(12.1)- netfilter機制之初探


延續Linux Modules(12)- netfilter我們由nf_register_hooks()來看看netfilter這個framework是如何運作的。

struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS] __read_mostly;

int nf_register_hook(struct nf_hook_ops *reg)
{
    struct nf_hook_ops *elem;
    int err;

    err = mutex_lock_interruptible(&nf_hook_mutex);
    if (err < 0)
        return err;
    list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) {
        if (reg->priority < elem->priority)
            break;
    }
    list_add_rcu(®->list, elem->list.prev);
    mutex_unlock(&nf_hook_mutex);
    return 0;
}

void nf_unregister_hook(struct nf_hook_ops *reg)
{
    mutex_lock(&nf_hook_mutex);
    list_del_rcu(®->list);
    mutex_unlock(&nf_hook_mutex);
    synchronize_net();
}
nf_hook_register_hook()其實就是在將要註冊的callback function依照所屬的protocol family以及hooknum插入struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS],並且會依照priority由小到大,而nf_unregister_hook()就是很簡單的reg由nf_hooks中移除。

接著我們再來看看nf_iterate(),程式碼中以//為註解方式,且為粗體字型就是我的註解。
unsigned int
nf_iterate(struct list_head *head, struct sk_buff *skb,
          unsigned int hook, const struct net_device *indev,
          const struct net_device *outdev, struct list_head **i,
          int (*okfn)(struct sk_buff *), int hook_thresh)
{
    unsigned int verdict;

    /*
     * The caller must not block between calls to this
     * function because of risk of continuing from deleted element.
     */
    list_for_each_continue_rcu(*i, head) {
        struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;

        // 註冊的priority必須小於等於hook_thresh才會被執行
        if (hook_thresh > elem->priority)
            continue;

        /* Optimization: we don't need to hold module
           reference here, since function can't sleep. --RR */
        //丟進註冊的hook function執行
        verdict = elem->hook(hook, skb, indev, outdev, okfn);
        if (verdict != NF_ACCEPT) {
#ifdef CONFIG_NETFILTER_DEBUG
            if (unlikely((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT)) {
                NFDEBUG("Evil return from %p(%u).\n", elem->hook, hook);
                continue;
            }
#endif
            //如果不是NF_ACCEPT而且也不是NF_REPEAT就回傳verdict
            // (NF_DROP/NF_STOLEN/NF_QUEUE)
            if (verdict != NF_REPEAT)
                return verdict;
            //會執行到這邊就是NF_REPEAT啦
            *i = (*i)->prev;
        }
        // 如果verdict是NF_ACCEPT就會繼續往下一個hook function執行
    }
    //如果沒有任何的hook function或者所有的hook function都是NF_ACCEPT
    return NF_ACCEPT;
}

/* Returns 1 if okfn() needs to be executed by the caller,
 * -EPERM for NF_DROP, 0 otherwise. */
int
nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
             struct net_device *indev, struct net_device *outdev,
             int (*okfn)(struct sk_buff *), int hook_thresh)
{
    struct list_head *elem;
    unsigned int verdict;
    int ret = 0;

    /* We may already have this, but read-locks nest anyway */
    rcu_read_lock();

    elem = &nf_hooks[pf][hook];
next_hook:
    // 將nf_hooks[pf][hook]這個linked list丟進nf_iterate()中執行
    verdict = nf_iterate(&nf_hooks[pf][hook], skb, hook, indev,
                         outdev, &elem, okfn, hook_thresh);
    if (verdict == NF_ACCEPT || verdict == NF_STOP) {
        // 如果是NF_ACCEPT或NF_STOP就回傳1, 到時候NF_HOOK()/NF_HOOK_COND()
        // 等macro就會執行okfn, 前面的註解也有說明
        ret = 1;
    } else if (verdict == NF_DROP) {
        // 如果是NF_DROP就會free resource並且回傳!1, 就是不會呼叫okfn()了
        kfree_skb(skb);
        ret = -EPERM;
    } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
        // 如果是QUEUE就會將他nf_queue()將資訊暫時存起來, 等候處理
        if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
                      verdict >> NF_VERDICT_BITS))
            goto next_hook;
    }
    rcu_read_unlock();
    // 執行到這邊有可能是NF_STOLEN, 但ret = 0, 所以不會執行okfn,
    // NF_STOLEN會改變packet原本要走的路徑
    return ret;
}


#iddef CONFIG_NETFILTER
int
nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
             struct net_device *indev, struct net_device *outdev,
             int (*okfn)(struct sk_buff *), int thresh);

/**
 * nf_hook_thresh - call a netfilter hook
 * 
 * Returns 1 if the hook has allowed the packet to pass.  The function
 * okfn must be invoked by the caller in this case.  Any other return
 * value indicates the packet has been consumed by the hook.
 */
static inline int 
nf_hook_thresh(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
               struct net_device *indev, struct net_device *outdev,
               int (*okfn)(struct sk_buff *), int thresh)
{
#ifndef CONFIG_NETFILTER_DEBUG
    if (list_empty(&nf_hooks[pf][hook]))
        return 1;
#endif
    return nf_hook_slow(pf, hook, skb, indev, outdev, okfn, thresh);
}

static inline int
nf_hook(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
        struct net_device *indev, struct net_device *outdev,
        int (*okfn)(struct sk_buff *))
{
    return nf_hook_thresh(pf, hook, skb, indev, outdev, okfn, INT_MIN);
}
                   
/* Activate hook; either okfn or kfree_skb called, unless a hook
   returns NF_STOLEN (in which case, it's up to the hook to deal with
   the consequences).

   Returns -ERRNO if packet dropped.  Zero means queued, stolen or
   accepted.
*/

/* RR:
   > I don't want nf_hook to return anything because people might forget
   > about async and trust the return value to mean "packet was ok".

   AK:
   Just document it clearly, then you can expect some sense from kernel
   coders :)
*/

static inline int
NF_HOOK_THRESH(uint8_t pf, unsigned int hook, struct sk_buff *skb,
               struct net_device *in, struct net_device *out,
               int (*okfn)(struct sk_buff *), int thresh)
{
    int ret = nf_hook_thresh(pf, hook, skb, in, out, okfn, thresh);
    if (ret == 1)
        ret = okfn(skb);
    return ret;
}

static inline int
NF_HOOK_COND(uint8_t pf, unsigned int hook, struct sk_buff *skb,
             struct net_device *in, struct net_device *out,
             int (*okfn)(struct sk_buff *), bool cond)
{
    int ret;

    if (!cond ||
            (ret = nf_hook_thresh(pf, hook, skb, in, out, okfn, INT_MIN) == 1))
        ret = okfn(skb);
    return ret;
}

static inline int
NF_HOOK(uint8_t pf, unsigned int hook, struct sk_buff *skb,
        struct net_device *in, struct net_device *out,
        int (*okfn)(struct sk_buff *))
{
    return NF_HOOK_THRESH(pf, hook, skb, in, out, okfn, INT_MIN);
}

#else /* !CONFIG_NETFILTER */

#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb)
#define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) (okfn)(skb)
static inline int 
nf_hook_thresh(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
               struct net_device *indev, struct net_device *outdev,
               int (*okfn)(struct sk_buff *), int thresh)
{
    return okfn(skb);
}

static inline int 
nf_hook(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
        struct net_device *indev, struct net_device *outdev,
        int (*okfn)(struct sk_buff *))
{
    return 1;
}
#endif /*CONFIG_NETFILTER*/
如果沒有defined CONFIG_NETFILTER,NF_HOOK()其實就是直接呼叫okfn了。到這邊對於netfilter的運作就有基本的認識了,有機會hack其他關於netfilter的心得再和大家分享。

Kernel version:2.6.36



沒有留言:

張貼留言

熱門文章