用Netfilter模块实现基于令牌桶的每IP地址流量控制-阿里云开发者社区

在《修改netfilter的limit模块实现基于单个ip的流量监控》的最后，写了一个修订版的match回调函数，但是修订的版本还只是能监控单包或者两包的流量，粒度还是过于粗糙，因此使用传统的令牌桶的方式更好。在数据结构上，没有必要真的实现一个令牌桶，而是基于时间的流逝生成受控制数量的令牌即可-以时间的流逝来洗涤旧迹，也就是将两次发包或者收包的间隔和令牌数量联系起来。在Linux内核的标准流控实现(qdisc)以及鲁迅的散文中，这么做很常见。
以下是所有可编译运行的代码，整个流控代码分为两大部分。第一部分是内核模块，实现了netfilter的一个match；第二部分是用户态iptables扩展库，实现了一个match配置。
以下是内核模块，基于内核版本2.6.32-5-amd64，位于/usr/src/linux-source-2.6.32/net/netfilter/xt_limit.c：

/**  *     2011/11/06 by marywangran  *    这个版本实现只能使所有规则共享一个上限，作为全局管理局部特例  *    很方便，不多的几条iptables规则即可  */ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/spinlock.h> #include <linux/interrupt.h> #include <net/ip.h>  #include <linux/netfilter/x_tables.h> #include <linux/types.h>  MODULE_LICENSE("GPL"); MODULE_AUTHOR("marywangran <marywangran@126.com>"); MODULE_DESCRIPTION("Xtables: rate-limit match"); MODULE_ALIAS("ipt_limit"); MODULE_ALIAS("ip6t_limit");  struct xt_rateinfo {         __u32 type;    //定义类型，1为源地址限速，2为目标地址限速         __u32 burst;    //定义最大流量，统计值 };  struct src_controler {         struct list_head src_list;         int curr;             //当前的IP连接数         int max;            //最大的IP连接数         spinlock_t lock; };  struct src_entry {         struct list_head list;         int type;        //同xt_rateinfo结构体的type         __u32   addr;            unsigned long prev_hit; //上次包到来的时间         unsigned long toks;    //当前拥有的令牌数         spinlock_t lock; };  struct src_controler *src_ctl;  static bool limit_mt(const struct sk_buff *skb, const struct xt_match_param *par) {         const struct xt_rateinfo *r = par->matchinfo;         unsigned long now = jiffies;          struct list_head *lh;         struct src_entry *entry = NULL;         struct src_entry *find_entry;         long tokens;         struct iphdr *iph = ip_hdr(skb);         __u32 this_addr = 0;          if (r->type == 1) {                 this_addr = iph->saddr;         } else {                 this_addr = iph->daddr;         }          spin_lock (&src_ctl->lock);  //操作链表一定加锁，多CPU下防止并发修改，访问         list_for_each(lh, &src_ctl->src_list) {                 find_entry = list_entry(lh, struct src_entry, list);                 if ((this_addr == find_entry->addr) &&                     (find_entry->type == r->type)) {                         entry = find_entry;                         break;                 }         }         spin_unlock (&src_ctl->lock);         if (entry) {                 spin_lock (&src_ctl->lock);                 list_del(&entry->list);                 list_add(&entry->list, &src_ctl->src_list);                 spin_unlock (&src_ctl->lock);         } else {                    if (src_ctl->curr+1 < src_ctl->max) { add_entry:                         entry = kmalloc(sizeof(struct src_entry), GFP_ATOMIC); //必须使用ATOMIC标志，因为有可能在(软)中断中运行，不能睡眠。                         memset(entry, 0, sizeof(struct src_entry));                         entry->addr = this_addr;                         entry->toks = r->burst; //第一次分配令牌时不加倍(加倍理由为防止使用浮点运算)，以防TCP的慢启动增加突发流量，TCP的慢启动实际上很快，指数级的。                         entry->prev_hit = now;                         entry->type = r->type;                         spin_lock_init(&entry->lock);                         spin_lock (&src_ctl->lock);                         list_add(&entry->list, &src_ctl->src_list);                         src_ctl->curr++;     //应该使用atomic_inc进行递增                         spin_unlock (&src_ctl->lock);                 } else {                         entry = list_entry(src_ctl->src_list.prev, struct src_entry, list);                         if (now-entry->now > 1000) {                                 spin_lock (&src_ctl->lock);                                 list_del(&entry->list);                                 src_ctl->curr--;                                 spin_unlock (&src_ctl->lock);                                 vfree(entry);   //解锁后vfree                                 goto add_entry;                         }                         return 1;                 }         }     //以下根据流逝的时间来确定令牌的数量         tokens = min_t(long, (now-entry->prev_hit)*r->burst, r->burst*1000);         tokens += entry->toks;         if (tokens > (long)r->burst*1000)                         tokens = r->burst*1000;                 tokens -= skb->len*1000; //统一增加HZ倍，避免在内核使用浮点数和除法。          if (tokens >= 0) {                 spin_lock (&entry->lock);                 entry->prev_hit = now;                 entry->toks = tokens;    //令牌积累                 spin_unlock (&entry->lock);                 return 0;         }         return 1; }  static bool limit_mt_check(const struct xt_mtchk_param *par) {         struct xt_rateinfo *r = par->matchinfo;         if (r->burst == 0 || r->type == 0) {                 return false;         }         if (r->type != 1 && r->type != 2)                 return false;         return true; }  static void limit_mt_destroy(const struct xt_mtdtor_param *par) {     //TODO }   static struct xt_match limit_mt_reg __read_mostly = {         .name             = "limit",         .revision         = 0,         .family           = NFPROTO_UNSPEC,         .match            = limit_mt,         .checkentry       = limit_mt_check,         .destroy          = limit_mt_destroy,         .matchsize        = sizeof(struct xt_rateinfo),         .me               = THIS_MODULE, };  static int __init limit_mt_init(void) {         src_ctl = kmalloc(sizeof(struct src_controler), GFP_KERNEL); //初始化全局变量，insmod上下文，可以使用KERNEL标志         memset(src_ctl, 0, sizeof(struct src_controler));         INIT_LIST_HEAD(&src_ctl->src_list);    //初始化全局变量的链表         src_ctl->curr = 0;         src_ctl->max = 2000;         spin_lock_init(&src_ctl->lock);         return xt_register_match(&limit_mt_reg); }  static void __exit limit_mt_exit(void) {         struct src_entry *entry = NULL;         struct list_head *lh = NULL, *lh2 = NULL;         xt_unregister_match(&limit_mt_reg);         spin_lock(&src_ctl->lock);         list_for_each_safe(lh, lh2, &src_ctl->src_list) { //一定要用safe宏，因为这是个外部迭代器                 entry = list_entry(lh, struct src_entry, list);                 list_del(&entry->list);                 kfree(entry);         }         spin_unlock(&src_ctl->lock);         kfree(src_ctl);  }  module_init(limit_mt_init); module_exit(limit_mt_exit);

为了方便编译和安装，以下是Makefile：

LINUXPATH = /lib/modules/`uname -r`/build CURDIR = $(shell pwd) KBUILD_OUTPUT = $(CURDIR) CROSS_COMPILE = ARCH =  obj-m                           += xt_limit.o  all: limit  limit:         $(MAKE) -C $(LINUXPATH) M=$(CURDIR) modules         @echo "*********************************************"         @echo "*  The MODULE is OK!!"         @echo "*********************************************" .PHONY: clean clean:         rm -rf *.o *.ko *.mod.c *.symvers *.mod.o .*.cmd  ../common/*.o .tmp_versions

安装：
直接make后insmod或者将ko文件拷贝到/lib/modules/`uname -r`/kernel/net/netfilter/下面，然后modprobe xt_limit

****************************************************************************************************************************
以下是用户态iptables模块的代码，基于iptables版本1.4.12，位于iptables-1.4.12/extensions/libxt_limit.c：

/**  *    2011/11/06 by marywangran  *    修改自iptables-1.4.12/extensions/libxt_limit.c  */  #include <stdio.h> #include <string.h> #include <stdlib.h> #include <xtables.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_limit.h>  #define XT_LIMIT_BURST  500000  /**  *    新增轻量级rateinfo结构，对应于内核的等价结构  */ struct xt_rateinfo_new {         __u32 type;         __u32 burst; };  enum {         O_TYPE = 0,         O_BURST, };  static void limit_help(void) {         printf( "limit match options:\n" "--type source[1]|destination[2]        define source limit or destination limit\n" "--limit-burst rate                     rate to match in a burst, default %u\n", XT_LIMIT_BURST); }  static const struct xt_option_entry limit_opts[] = {         {.name = "type", .id = O_TYPE, .type = XTTYPE_STRING},         {.name = "limit-burst", .id = O_BURST, .type = XTTYPE_UINT32,          .flags = XTOPT_PUT, XTOPT_POINTER(struct xt_rateinfo_new, burst),          .min = 0, .max = 1073741824}, //1024*1024*1024         XTOPT_TABLEEND, };  static int parse_rate(const char *rate, uint32_t *val) {         uint32_t r;         r = atoi(rate);         if (!r)                 return 0;         if (r != 1 && r != 2) //1为限制源地址，2为限制目的地址                 return 0;         *val = r;         return 1; }  static void limit_init(struct xt_entry_match *m) {         struct xt_rateinfo_new *r = (struct xt_rateinfo_new *)m->data;         parse_rate("1", &r->type);         r->burst = XT_LIMIT_BURST; }  static void limit_parse(struct xt_option_call *cb) {         struct xt_rateinfo_new *r = cb->data;         xtables_option_parse(cb);         switch (cb->entry->id) {         case O_TYPE:                 if (!parse_rate(cb->arg, &r->type))                         xtables_error(PARAMETER_PROBLEM,                                    "bad rate \"%s\"'", cb->arg);                 break;         }         if (cb->invert)                 xtables_error(PARAMETER_PROBLEM,                            "limit does not support invert"); }  static void print_rate(uint32_t period) {         printf(" %u", period); }  static void limit_print(const void *ip, const struct xt_entry_match *match, int numeric) {         const struct xt_rateinfo_new *r = (const void *)match->data;         printf(" type: avg"); print_rate(r->type);         printf(" burst %u", r->burst); }  static void limit_save(const void *ip, const struct xt_entry_match *match) {         const struct xt_rateinfo_new *r = (const void *)match->data;          printf(" --type"); print_rate(r->type);         if (r->burst != XT_LIMIT_BURST)                 printf(" --limit-burst %u", r->burst); }  static struct xtables_match limit_match = {         .family         = NFPROTO_UNSPEC,         .name           = "limit",         .version        = XTABLES_VERSION,         .size           = XT_ALIGN(sizeof(struct xt_rateinfo_new)),         .help           = limit_help,         .init           = limit_init,         .x6_parse       = limit_parse,         .print          = limit_print,         .save           = limit_save,         .x6_options     = limit_opts, };  void _init(void) {         xtables_register_match(&limit_match); }

安装：

进入$IPTABLES_SOURCE/extensions目录，执行make install，
测试：
此时添加下列的规则：
iptables -A INPUT -m limit --type 1 --limit-burst 500000 -j DROP
iptables -A OUTPUT -m limit --type 2 --limit-burst 100000 -j DROP
即可对本机发出的和接收的基于IP地址的数据流进行限速，如果在本机往同一台机器scp多个文件，那么这多个数据传输流将共享配置的限速额带宽。如果想限制某一个而不是完全限制，则在规则中添加其它的match进行筛选。

说明：以上的代码都是基于现有的Linux内核以及iptables源码修改的，框架虽在，然逻辑已全非，如果需保留原来的limit功能，请自行实现新的文件。

本文转自 dog250 51CTO博客，原文链接:http://blog.51cto.com/dog250/1270919