2009年12月16日 星期三

Linux Kernel(5)- ioctl


(V)將介紹file operations中的ioctl。ioctl的prototype為:
int (*ioctl) (struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
ioctl藉由cmd來判斷後面所接的參數為何,而早期的ioctl號碼並沒有規則,所以很容易重複,後來為了避免重複,採行編碼方式,將cmd拆成幾個部份,包含:
type
  即magic number,可以根據Document/ioctl/ioctl-number.txt挑選一個。
number
  為sequential number或者稱為ordinal number,讓user自行定義,只要自己不重複即可。
direction
  傳輸的方向,不外乎NONOE/READ/WRITE等等。
size
  即參數的size。

因為ioctl藉由cmd來判斷user想要的指令為何,以及後面所帶的參數為何,所以免不了的就會有一個switch/case來判斷,這也算是ioctl的特色吧。
怎麼定義ioctl的command以及如何解譯ioctl的command,我想直接拿ioctl.h來說明。
#define _IOC(dir,type,nr,size) \
         (((dir)  << _IOC_DIRSHIFT) | \
         ((type) << _IOC_TYPESHIFT) | \
         ((nr)   << _IOC_NRSHIFT) | \
         ((size) << _IOC_SIZESHIFT))

/* used to create numbers */
#define _IO(type,nr)            _IOC(_IOC_NONE,(type),(nr),0)
#define _IOR(type,nr,size)      _IOC(_IOC_READ,(type),(nr),(_IOC_TYPECHECK(size)))
#define _IOW(type,nr,size)      _IOC(_IOC_WRITE,(type),(nr),(_IOC_TYPECHECK(size)))
#define _IOWR(type,nr,size)     _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),(_IOC_TYPECHECK(size)))
#define _IOR_BAD(type,nr,size)  _IOC(_IOC_READ,(type),(nr),sizeof(size))
#define _IOW_BAD(type,nr,size)  _IOC(_IOC_WRITE,(type),(nr),sizeof(size))
#define _IOWR_BAD(type,nr,size) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size))

/* used to decode ioctl numbers.. */
#define _IOC_DIR(nr)            (((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK)
#define _IOC_TYPE(nr)           (((nr) >> _IOC_TYPESHIFT) & _IOC_TYPEMASK)
#define _IOC_NR(nr)             (((nr) >> _IOC_NRSHIFT) & _IOC_NRMASK)
#define _IOC_SIZE(nr)           (((nr) >> _IOC_SIZESHIFT) & _IOC_SIZEMASK)
在定義ioctl的command時,我們會根據資料傳輸的方向使用_IO(不需要傳輸資料)/_IOR(讀取)/_IOW(寫入)/_IOWR(讀寫),type是我們挑選的magic number,nr即number,是流水號,size,就是the size of argument,下面是我們的範例brook_ioctl.h:

#ifndef IOC_BROOK_H
#define IOC_BROOK_H

#define BROOK_IOC_MAGIC     'k'
#define BROOK_IOCSETNUM     _IOW(BROOK_IOC_MAGIC,  1, int)
#define BROOK_IOCGETNUM     _IOR(BROOK_IOC_MAGIC,  2, int)
#define BROOK_IOCXNUM       _IOWR(BROOK_IOC_MAGIC, 3, int)
#define BROOK_IOC_MAXNR     3

#endif
這邊定義三個ioctl的command,分別為設定數值(BROOK_IOCSETNUM),取得數值(BROOK_IOCGETNUM)和交換數值(BROOK_IOCXNUM)。

以下是我的module:
#include <linux/init.h>
#include <linux/module.h>

#include <linux/fs.h> // chrdev
#include <linux/cdev.h> // cdev_add()/cdev_del()
#include <linux/semaphore.h> // up()/down_interruptible()
#include <asm/uaccess.h> // copy_*_user()

#include "ioc_brook.h"

MODULE_LICENSE("GPL");

#define DEV_BUFSIZE         1024


static int dev_major;
static int dev_minor;
struct cdev *dev_cdevp = NULL;

static int 
  dev_open(struct inode*, struct file*);
static int 
  dev_release(struct inode*, struct file*);
static int
  dev_ioctl(struct inode*, struct file*, unsigned int, unsigned long);

static void __exit exit_modules(void);

struct file_operations dev_fops = {
    .owner   = THIS_MODULE,
    .open    = dev_open,
    .release = dev_release,
    .ioctl   = dev_ioctl
};

static int dev_open(struct inode *inode, struct file *filp)
{
    printk("%s():\n", __FUNCTION__);
    return 0;
}

static int dev_release(struct inode *inode, struct file *filp)
{
    printk("%s():\n", __FUNCTION__);
    return 0;
}

static int brook_num = 0;
static int 
dev_ioctl(struct inode *inode, struct file *filp,
          unsigned int cmd, unsigned long args)
{
    int tmp, err = 0, ret = 0;

    if (_IOC_TYPE(cmd) != BROOK_IOC_MAGIC)
        return -ENOTTY;
    if (_IOC_NR(cmd) > BROOK_IOC_MAXNR)
        return -ENOTTY;

    if (_IOC_DIR(cmd) & _IOC_READ) {
        err = !access_ok(VERIFY_WRITE, (void __user*)args, _IOC_SIZE(cmd));
    } else if (_IOC_DIR(cmd) & _IOC_WRITE) {
        err = !access_ok(VERIFY_READ, (void __user *)args, _IOC_SIZE(cmd));
    }
    if (err)
        return -EFAULT;

    switch (cmd) {
        case BROOK_IOCSETNUM:
            // don't need call access_ok() again. using __get_user().
            ret = __get_user(brook_num, (int __user *)args); 
            printk("%s(): get val = %d\n", __FUNCTION__, brook_num);
            break;
        case BROOK_IOCGETNUM:
            ret = __put_user(brook_num, (int __user *)args);
            printk("%s(): set val to %d\n", __FUNCTION__, brook_num);
            break;
        case BROOK_IOCXNUM:
            tmp = brook_num;
            ret = __get_user(brook_num, (int __user *)args);
            if (!ret) {
                ret = __put_user(tmp, (int __user *)args);
            }
            printk("%s(): change val from %d to %d\n",
                       __FUNCTION__, tmp, brook_num);
            break;
        default: /* redundant, as cmd was checked against MAXNR */
            return -ENOTTY;
    }
    return 0;
}

static int __init init_modules(void)
{
    dev_t dev;
    int ret;

    ret = alloc_chrdev_region(&dev, 0, 1, "brook");
    if (ret < 0) {
        printk("can't alloc chrdev\n");
        return ret;
    }
    dev_major = MAJOR(dev);
    dev_minor = MINOR(dev);
    printk("register chrdev(%d,%d)\n", dev_major, dev_minor);

    dev_cdevp = kmalloc(sizeof(struct cdev), GFP_KERNEL);
    if (dev_cdevp == NULL) {
        printk("kmalloc failed\n");
        goto failed;
    }
    cdev_init(dev_cdevp, &dev_fops);
    dev_cdevp->owner = THIS_MODULE;
    ret = cdev_add(dev_cdevp, MKDEV(dev_major, dev_minor), 1);
    if (ret < 0) {
        printk("add chr dev failed\n");
        goto failed;
    }

    return 0;

failed:
    if (dev_cdevp) {
        kfree(dev_cdevp);
        dev_cdevp = NULL;
    }
    return 0;
}

static void __exit exit_modules(void)
{
    dev_t dev;

    dev = MKDEV(dev_major, dev_minor);
    if (dev_cdevp) {
        cdev_del(dev_cdevp);
        kfree(dev_cdevp);
    }
    unregister_chrdev_region(dev, 1);
    printk("unregister chrdev\n");
}

module_init(init_modules);
module_exit(exit_modules);

在dev_ioctl()先檢視command的type(magic number)和number(sequential number)是否正確,接著在根據command的read/write特性,使用access_ok()檢驗該位址是否合法,後面就是ioctl慣有的switch/case了,根據不同的case執行不同的command和解釋後面所攜帶的參數。

底下是我的application:
#include <stdio.h>
#include <sys/ioctl.h>
#include <sys/types.h>

#include <sys/stat.h>
#include <fcntl.h>

#include "ioc_brook.h"

int main(int argc, char *argv[])
{
    int fd, ret;

    if (argc < 2) {
        printf("Usage: prog \n");
        return -1;
    }

    fd = open(argv[1], O_RDWR);
    if (fd < 0) {
        printf("open %s failed\n", argv[1]);
        return -1;
    }

    ret = 10;
    if (ioctl(fd, BROOK_IOCSETNUM, &ret) < 0) {
        printf("set num failed\n");
        return -1;
    }

    if (ioctl(fd, BROOK_IOCGETNUM, &ret) < 0) {
        printf("get num failed\n");
        return -1;
    }
    printf("get value = %d\n", ret);

    ret = 100;
    if (ioctl(fd, BROOK_IOCXNUM, &ret) < 0) {
        printf("exchange num failed\n");
        return -1;
    }
    printf("get value = %d\n", ret);

    return 0;
}

在app.c中,將開啟上面註冊的device,並且設定數值(BROOK_IOCSETNUM),讀取數值(BROOK_IOCGETNUM),和交換數值(BROOK_IOCXNUM)。



2009年12月15日 星期二

如何利用kvm/qemu練習linux module


"如何利用kvm/qemu練習linux module"將介紹如何編譯一個bzImage在kvm上面執行,我們將掛上一個initramfs當我們的root filesystem,除了沒有實體的裝置以外,其實可以看成一個embedded linux了。我也是利用這種方式撰寫module的文章,因為module一寫不好,kernel就會crash了,透過vm,就不怕kernel crash了。
編譯kernel
brook@ubuntu:~$ mkdir linux
brook@ubuntu:~$ cd linux/
brook@ubuntu:~/linux$ apt-get source linux-image-2.6.31-16-generic
Reading package lists... Done
Building dependency tree       
Reading state information... Done
NOTICE: 'linux' packaging is maintained in the 'Git' version control system at:
http://kernel.ubuntu.com/git-repos/ubuntu/ubuntu-karmic.git
Need to get 81.0MB of source archives.
Get:1 http://tw.archive.ubuntu.com karmic-updates/main linux 2.6.31-16.53 (dsc) [3,781B]
0% [Waiting for headers]    
Fetched 81.0MB in 3min 29s (387kB/s)
gpgv: Signature made Tue 08 Dec 2009 11:50:10 AM CST using DSA key ID 17063E6D
gpgv: Can't check signature: public key not found
dpkg-source: warning: failed to verify signature on ./linux_2.6.31-16.53.dsc
dpkg-source: info: extracting linux in linux-2.6.31
dpkg-source: info: unpacking linux_2.6.31.orig.tar.gz
dpkg-source: info: applying linux_2.6.31-16.53.diff.gz
brook@ubuntu:~/linux$ cd linux-2.6.31
brook@ubuntu:~/linux/linux-2.6.31$ cp /boot/config-2.6.31-16-generic .config
brook@ubuntu:~/linux/linux-2.6.31$ make oldconfig
brook@ubuntu:~/linux/linux-2.6.31$ make menuconfig

接下來要設定我們的initramfs的目錄,先make menuconfig,接著選"General setup" -> "Initramfs source file(s)",接著輸入目錄"/home/brook/linux/rootfs",注意該選項的上一個選項"Initial RAM ilesystem and RAM disk (initramfs/initrd) support"有被enabled。如果要使用的是initrd則"Initramfs source file(s)"就留空白,使用initrd的好處是,initrd有任何改變都不需要重新compile kernel。

rootfs的基本設定
brook@ubuntu:~/linux$ mkdir rootfs
brook@ubuntu:~/linux$ cd rootfs
brook@ubuntu:~/linux/rootfs$ mkdir dev
brook@ubuntu:~/linux/rootfs$ mkdir tmp
brook@ubuntu:~/linux/rootfs$ mkdir bin
brook@ubuntu:~/linux/rootfs$ mkdir sbin
brook@ubuntu:~/linux/rootfs$ mkdir etc
brook@ubuntu:~/linux/rootfs$ mkdir lib
brook@ubuntu:~/linux/rootfs$ mkdir proc
brook@ubuntu:~/linux/rootfs$ mkdir sys
brook@ubuntu:~/linux/rootfs$ mkdir usr
brook@ubuntu:~/linux/rootfs$ apt-get install busybox-static
brook@ubuntu:~/linux/rootfs$ cp /bin/busybox bin
brook@ubuntu:~/linux/rootfs$ cd bin
brook@ubuntu:~/linux/rootfs/bin$ ln -s busybox -s mkdir
brook@ubuntu:~/linux/rootfs/bin$ ln -s busybox -s mknod
brook@ubuntu:~/linux/rootfs/bin$ ln -s busybox -s mount
brook@ubuntu:~/linux/rootfs/bin$ ln -s busybox -s rm
brook@ubuntu:~/linux/rootfs/bin$ ln -s busybox -s sh
brook@ubuntu:~/linux/rootfs/bin$ cd ../sbin
brook@ubuntu:~/linux/rootfs/sbin$ ln -s ../busybox -s init
brook@ubuntu:~/linux/rootfs/sbin$ ln -s ../busybox -s mdev
brook@ubuntu:~/linux/rootfs/$ find . |cpio -H newc -o > ../initrd

如果使用的是initrd則需要用cpio壓縮這個目錄,如果是initramfs就不用最後一個指令了。

接著當然還要寫一下init script,負責開機後的一些基本設定,其內容如下
#!/bin/sh
#Mount things needed by this script
mount -t tmpfs -o size=64k,mode=0755 tmpfs /dev
mkdir /dev/pts
mount -t devpts devpts /dev/pts
/bin/mount -t proc proc /proc
/bin/mount -t sysfs sysfs /sys

#Create all the symlinks to /bin/busybox
/bin/busybox --install -s

#Create device nodes
echo /sbin/mdev > /proc/sys/kernel/hotplug
/sbin/mdev -s

exec /sbin/init
最後make bzImage即可。

接著執行kvm/qemu吧
brook@ubuntu:~/linux/linux-2.6.31$ kvm -no-acpi -kernel arch/x86_64/boot/bzImage -net nic,model=pcnet -net tap,ifname=tap0,script=no

如果是initrd則需要多一個參數給kvm/qemu。
brook@ubuntu:~/linux/linux-2.6.31$ kvm -no-acpi -kernel arch/x86_64/boot/bzImage -initrd /home/brook/initrd -net nic,model=pcnet -net tap,ifname=tap0,script=no



2009年12月13日 星期日

Linux Kernel(4.1)- seq_file之範例(fp/proc/devices.c)


(IV .1)是seq_file的實例說明,將Linux中的fp/proc/devices.c拿出來當範例並且予以說明。
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>

static int devinfo_show(struct seq_file *f, void *v)
{
    int i = *(loff_t *) v;

    if (i < CHRDEV_MAJOR_HASH_SIZE) {
        if (i == 0)
            seq_printf(f, "Character devices:\n");
        chrdev_show(f, i);
    }
#ifdef CONFIG_BLOCK
    else {
        i -= CHRDEV_MAJOR_HASH_SIZE;
        if (i == 0)
            seq_printf(f, "\nBlock devices:\n");
        blkdev_show(f, i);
    }
#endif
    return 0;
}


static void *devinfo_start(struct seq_file *f, loff_t *pos)
{
    if (*pos < (BLKDEV_MAJOR_HASH_SIZE + CHRDEV_MAJOR_HASH_SIZE))
        return pos;
    return NULL;
}

static void *devinfo_next(struct seq_file *f, void *v, loff_t *pos)
{
    (*pos)++;
    if (*pos >= (BLKDEV_MAJOR_HASH_SIZE + CHRDEV_MAJOR_HASH_SIZE))
        return NULL;
    return pos;
}

static void devinfo_stop(struct seq_file *f, void *v)
{
    /* Nothing to do */
}

static const struct seq_operations devinfo_ops = {
    .start = devinfo_start,
    .next  = devinfo_next,
    .stop  = devinfo_stop,
    .show  = devinfo_show
};

static int devinfo_open(struct inode *inode, struct file *filp)
{
    return seq_open(filp, &devinfo_ops);
}

static const struct file_operations proc_devinfo_operations = {
    .open  = devinfo_open,
    .read  = seq_read,
    .llseek  = seq_lseek,
    .release = seq_release,
};

static int __init proc_devices_init(void)
{
    proc_create("devices", 0, NULL, &proc_devinfo_operations);
    return 0;
}
module_init(proc_devices_init);
首先,這邊只有module_init(),所以只能載入,不能unload。而載入的點就是create一個proc檔 "devices",並且註冊其file operations "proc_devinfo_operations",在"proc_devinfo_operations"可以發現是一個seq_file的架構,所以我們就會想到start()/next()/stop()/show()等function應該負責的功能。
首先會看到start()即"devinfo_start()",可以看出pos代表的是第幾個device,而pos最大為block+char的總和。
"devinfo_next()"應該負責移動pos,所以可以看出只有做了(*pos)++。
由於"devinfo_start()"並沒有和系統要求任何的resource,所以"devinfo_stop()"就不需要有任何cleanup的動作。
而"devinfo_show()"則是各別呼叫"chrdev_show()"和"blkdev_show()"來顯示char device和block device。



熱門文章