linux网桥---初始化

  1. 云栖社区>
  2. 博客>
  3. 正文

linux网桥---初始化

云之大者 2014-06-27 18:11:41 浏览400
展开阅读全文
   上一节只是说了关于网桥的接收处理,这里分析下网桥模块的初始化工作. 对于桥的用户空间配置工具一般来说有brctl和ebtables.
  参考内核 2.6.32.61   kernel/net/bridge/*
  我们来看一下br.c中br_init函数

点击(此处)折叠或打开

  1. static int __init br_init(void)
  2. {
  3.     int err;

  4.     err = stp_proto_register(&br_stp_proto);      //注册stp协议,把协议方到garp_protos里,在net/802/stp.c stp_pdu_rcv中有使用

  5. 点击(此处)折叠或打开

    1. static const struct stp_proto br_stp_proto = {
    2.     .rcv    = br_stp_rcv,
    3. };


  6.     if (err 0) {
  7.         printk(KERN_ERR "bridge: can't register sap for STP\n");
  8.         return err;
  9.     }

  10.     err = br_fdb_init();     // 创建fdb 缓冲区
  11.     if (err)
  12.         goto err_out;

  13.     err = register_pernet_subsys(&br_net_ops); //注册桥子系统,主要是网络命名空间.
  14.     if (err)
  15.         goto err_out1;

  16.     err = br_netfilter_init();  //netfilter桥部分的初始化,ebtables工具配置使用.
  17.    

    点击(此处)折叠或打开

    1. int __init br_netfilter_init(void)
    2. {
    3.     int ret;

    4.     ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
    5.     if (ret 0)
    6.         return ret;
    7. #ifdef CONFIG_SYSCTL
    8.     brnf_sysctl_header = register_sysctl_paths(brnf_path, brnf_table);
    9.     if (brnf_sysctl_header == NULL) {
    10.         printk(KERN_WARNING
    11.          "br_netfilter: can't register to sysctl.\n");
    12.         nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
    13.         return -ENOMEM;
    14.     }
    15. #endif
    16.     printk(KERN_NOTICE "Bridge firewalling registered\n");
    17.     return 0;
    18. }

  18.     if (err)
  19.         goto err_out2;

  20.     err = register_netdevice_notifier(&br_device_notifier);   //注册桥设备关心的通知链
  21.     if (err)
  22.         goto err_out3;

  23.     err = br_netlink_init();    //  netlink 
  24.     if (err)
  25.         goto err_out4;

  26.     brioctl_set(br_ioctl_deviceless_stub);   //  ioctl  socket  netlink相关的 不是设备本身的ioctl
  27.     br_handle_frame_hook = br_handle_frame;

  28. #if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
  29.     br_fdb_test_addr_hook = br_fdb_test_addr;
  30. #endif

  31.     return 0;
  32. err_out4:
  33.     unregister_netdevice_notifier(&br_device_notifier);
  34. err_out3:
  35.     br_netfilter_fini();
  36. err_out2:
  37.     unregister_pernet_subsys(&br_net_ops);
  38. err_out1:
  39.     br_fdb_fini();
  40. err_out:
  41.     stp_proto_unregister(&br_stp_proto);
  42.     return err;
  43. }
这里没有分析什么,初始化的部分内容不是很多,但是都很重要.
下面分析一下桥的创建以及添加接口. 在br_if.c中

点击(此处)折叠或打开

  1. int br_add_bridge(struct net *net, const char *name)
  2. {
  3.     struct net_device *dev;
  4.     int ret;

  5.     dev = new_bridge_dev(net, name); //这个函数的精华部分 ,申请netdevice,并初始化
  6.     if (!dev)
  7.         return -ENOMEM;

  8.     rtnl_lock();
  9.     if (strchr(dev->name, '%')) {
  10.         ret = dev_alloc_name(dev, dev->name);   //申请名字
  11.         if (ret 0)
  12.             goto out_free;
  13.     }

  14.     SET_NETDEV_DEVTYPE(dev, &br_type);

  15.     ret = register_netdevice(dev);   //注册设备
  16.     if (ret)
  17.         goto out_free;

  18.     ret = br_sysfs_addbr(dev);  // sysfs文件系统相关的
  19.     if (ret)
  20.         unregister_netdevice(dev);
  21.  out:
  22.     rtnl_unlock();
  23.     return ret;

  24. out_free:
  25.     free_netdev(dev);
  26.     goto out;
  27. }
我们看new_bridge_dev

点击(此处)折叠或打开

  1. static struct net_device *new_bridge_dev(struct net *net, const char *name)
  2. {
  3.     struct net_bridge *br;
  4.     struct net_device *dev;

  5.     dev = alloc_netdev(sizeof(struct net_bridge), name,    //申请设备,并br_dev_setup初始化 (模式和普通的网卡初始化没什么区别)

  6. 点击(此处)折叠或打开

    1. void br_dev_setup(struct net_device *dev)
    2. {
    3.     random_ether_addr(dev->dev_addr);
    4.     ether_setup(dev);

    5.     dev->netdev_ops = &br_netdev_ops;    //初始化设备的桥操作
    6.     dev->destructor = free_netdev;
    7.     SET_ETHTOOL_OPS(dev, &br_ethtool_ops);
    8.     dev->tx_queue_len = 0;
    9.     dev->priv_flags = IFF_EBRIDGE;

    10.     dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
    11.             NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX |
    12.             NETIF_F_NETNS_LOCAL | NETIF_F_GSO;
    13. }


  7.              br_dev_setup);

  8.     if (!dev)
  9.         return NULL;
  10.     dev_net_set(dev, net);   //网络命名空间

  11.     br = netdev_priv(dev);   //   br结构指向了netdev私有空间
  12.     br->dev = dev;

  13.     spin_lock_init(&br->lock);
  14.     INIT_LIST_HEAD(&br->port_list);
  15.     spin_lock_init(&br->hash_lock);

  16.     br->bridge_id.prio[0] = 0x80;   //桥特权级默认0x8000
  17.     br->bridge_id.prio[1] = 0x00;

  18.     memcpy(br->group_addr, br_group_address, ETH_ALEN);

  19.     br->feature_mask = dev->features;                 //对br的初始化工作
  20.     br->stp_enabled = BR_NO_STP;                       //默认stp关闭
  21.     br->designated_root = br->bridge_id;               
  22.     br->root_path_cost = 0;
  23.     br->root_port = 0;                                   //根port默认为0 
  24.     br->bridge_max_age = br->max_age = 20 * HZ;
  25.     br->bridge_hello_time = br->hello_time = 2 * HZ;
  26.     br->bridge_forward_delay = br->forward_delay = 15 * HZ;
  27.     br->topology_change = 0;
  28.     br->topology_change_detected = 0;
  29.     br->ageing_time = 300 * HZ;

  30.     br_netfilter_rtable_init(br);   //初始化桥路由信息,pmtu.可以看这个函数的注释

  31. 点击(此处)折叠或打开

    1. /*
    2.  * Initialize bogus route table used to keep netfilter happy.
    3.  * Currently, we fill in the PMTU entry because netfilter
    4.  * refragmentation needs it, and the rt_flags entry because
    5.  * ipt_REJECT needs it. Future netfilter modules might
    6.  * require us to fill additional fields.
    7.  */
    8. void br_netfilter_rtable_init(struct net_bridge *br)
    9. {
    10.     struct rtable *rt = &br->fake_rtable;

    11.     atomic_set(&rt->u.dst.__refcnt, 1);
    12.     rt->u.dst.dev = br->dev;
    13.     rt->u.dst.path = &rt->u.dst;
    14.     rt->u.dst.metrics[RTAX_MTU - 1] = 1500;
    15.     rt->u.dst.flags    = DST_NOXFRM;
    16.     rt->u.dst.ops = &fake_dst_ops;
    17. }



  32.     INIT_LIST_HEAD(&br->age_list);   //初始化老化时间链表

  33.     br_stp_timer_init(br);      //stp相关的几个timer 4个:hello_timer、tcn_timer、topology_change_timer、gc_timer

  34. 点击(此处)折叠或打开

    1. void br_stp_timer_init(struct net_bridge *br)
    2. {
    3.     setup_timer(&br->hello_timer, br_hello_timer_expired,
    4.          (unsigned long) br);

    5.     setup_timer(&br->tcn_timer, br_tcn_timer_expired,
    6.          (unsigned long) br);

    7.     setup_timer(&br->topology_change_timer,
    8.          br_topology_change_timer_expired,
    9.          (unsigned long) br);

    10.     setup_timer(&br->gc_timer, br_fdb_cleanup, (unsigned long) br);
    11. }


  35.     return dev;
  36. }
这里简单说下定时器的作用:它主要跟stp相关
hello timer:用于定期产生配置bpdu。只有根网桥可以使用该定时器
tcn    timer:由检测到拓扑变化而且必须通知根网桥的网桥使用.
Topology  change  timer: (拓扑变化 TC)
由根网桥使用,以便记住要在其配置bpdu中设定一个特殊标示。此标示用于将拓扑变化通知其他网桥(非根)
Aging 定时器 (gc timer)
用于从转发数据库中清除无效的地址。该定时器由网桥使用,而不论其是否启用stp
我们继续看看往桥里添加一个接口:
关于这个函数的调用,可以参考brctl 命令的使用 例如:brctl addif  br0  eth0 
这里为什么要说呢,主要刚才我们注册br其实也是netdevice设备,那么struct net_bridge呢?看下ioctl的具体操作就明白了。

点击(此处)折叠或打开

  1. int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
  2. {
  3.     struct net_bridge *br = netdev_priv(dev);

  4.     switch(cmd) {
  5.     case SIOCDEVPRIVATE:
  6.         return old_dev_ioctl(dev, rq, cmd);

  7.     case SIOCBRADDIF:
  8.     case SIOCBRDELIF:
  9.         return add_del_if(br, rq->ifr_ifindex, cmd == SIOCBRADDIF);

  10.     }

  11.     pr_debug("Bridge does not support ioctl 0x%x\n", cmd);
  12.     return -EOPNOTSUPP;
  13. }
添加桥端口函数:br_add_if

点击(此处)折叠或打开

  1. /* called with RTNL */
  2. int br_add_if(struct net_bridge *br, struct net_device *dev)
  3. {
  4.     struct net_bridge_port *p;
  5.     int err = 0;

  6.     /* Don't allow bridging non-ethernet like devices */    //非以太网类型的设备退出
  7.     if ((dev->flags & IFF_LOOPBACK) ||
  8.      dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN)
  9.         return -EINVAL;

  10.     /* No bridging of bridges */
  11.     if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit)   //桥设备不能再加入桥
  12.         return -ELOOP;

  13.     /* Device is already being bridged */     //已经属于桥的端口不能直接加入另外一个桥里
  14.     if (dev->br_port != NULL)
  15.         return -EBUSY;

  16.     p = new_nbp(br, dev);        // 初始化桥端口,和端口状态信息默认为blocking 且为指定端口    并且初始化port timer          
  17.     if (IS_ERR(p))
  18.         return PTR_ERR(p);

  19.     err = dev_set_promiscuity(dev, 1);
  20.     if (err)
  21.         goto put_back;

  22.     err = kobject_init_and_add(&p->kobj, &brport_ktype, &(dev->dev.kobj),
  23.                  SYSFS_BRIDGE_PORT_ATTR);
  24.     if (err)
  25.         goto err0;

  26.     err = br_fdb_insert(br, p, dev->dev_addr);   // 添加到fdb里 mac--port
  27.     if (err)
  28.         goto err1;

  29.     err = br_sysfs_addif(p);
  30.     if (err)
  31.         goto err2;

  32.     rcu_assign_pointer(dev->br_port, p);
  33.     dev_disable_lro(dev);

  34.     list_add_rcu(&p->list, &br->port_list);  把port添加到br port_list

  35.     spin_lock_bh(&br->lock);
  36.     br_stp_recalculate_bridge_id(br);
  37.     br_features_recompute(br);

  38.     if ((dev->flags & IFF_UP) && netif_carrier_ok(dev) &&
  39.      (br->dev->flags & IFF_UP))
  40.         br_stp_enable_port(p);    // 这里port状态为转发 根据具体情况
  41.     spin_unlock_bh(&br->lock);

  42.     br_ifinfo_notify(RTM_NEWLINK, p);

  43.     dev_set_mtu(br->dev, br_min_mtu(br));  //设置mtu

  44.     kobject_uevent(&p->kobj, KOBJ_ADD);

  45.     return 0;
  46. err2:
  47.     br_fdb_delete_by_port(br, p, 1);
  48. err1:
  49.     kobject_put(&p->kobj);
  50.     p = NULL; /* kobject_put frees */
  51. err0:
  52.     dev_set_promiscuity(dev, -1);
  53. put_back:
  54.     dev_put(dev);
  55.     kfree(p);
  56.     return err;
  57. }


图为桥和端口和fdb entry以及netdevice间的关系.
这里说一下port timers:

点击(此处)折叠或打开

  1. void br_stp_port_timer_init(struct net_bridge_port *p)
  2. {
  3.     setup_timer(&p->message_age_timer, br_message_age_timer_expired,
  4.          (unsigned long) p);

  5.     setup_timer(&p->forward_delay_timer, br_forward_delay_timer_expired,
  6.          (unsigned long) p);

  7.     setup_timer(&p->hold_timer, br_hold_timer_expired,
  8.          (unsigned long) p);
  9. }
message_age_timer: 由于bpdu携带的信息生存期是有限的,它用于强制执行这个生存期 限于收到bpdu数据

消息生存期  >  最大值  :丢弃

消息生存期  最大值  : 启动Message Age定时器.(时间为差值)

forward_delay_timer:负责状态转移,机制如图

hold_timer:限制端口bpdu的发送速率

我们这里看一下br_make_forwarding函数:

点击(此处)折叠或打开

  1. /* called under bridge lock */
  2. static void br_make_forwarding(struct net_bridge_port *p)
  3. {
  4.     struct net_bridge *br = p->br;

  5.     if (p->state != BR_STATE_BLOCKING)
  6.         return;

  7.     if (br->forward_delay == 0) {
  8.         p->state = BR_STATE_FORWARDING;
  9.         br_topology_change_detection(br);
  10.         del_timer(&p->forward_delay_timer);
  11.     }
  12.     else if (p->br->stp_enabled == BR_KERNEL_STP)   //默认没有开启
  13.         p->state = BR_STATE_LISTENING;
  14.     else
  15.         p->state = BR_STATE_LEARNING;   //正常情况下

  16.     br_log_state(p);

  17.     if (br->forward_delay != 0)
  18.         mod_timer(&p->forward_delay_timer, jiffies + br->forward_delay);
  19. }
这里br->forward_delay肯定不为0,见桥创建函数:

点击(此处)折叠或打开

  1. br->bridge_forward_delay = br->forward_delay = 15 * HZ;
而stp默认是没有开启的,所以最后的状态是BR_STATE_LEARING ,也就是刚把端口加入桥的时候,在port timer :forward_delay_timer没有到期的时候,它处于学习状态,而到期触发定时器后:

点击(此处)折叠或打开

  1. static void br_forward_delay_timer_expired(unsigned long arg)
  2. {
  3.     struct net_bridge_port *p = (struct net_bridge_port *) arg;
  4.     struct net_bridge *br = p->br;

  5.     pr_debug("%s: %d(%s) forward delay timer\n",
  6.          br->dev->name, p->port_no, p->dev->name);
  7.     spin_lock(&br->lock);
  8.     if (p->state == BR_STATE_LISTENING) {
  9.         p->state = BR_STATE_LEARNING;
  10.         mod_timer(&p->forward_delay_timer,
  11.              jiffies + br->forward_delay);
  12.     } else if (p->state == BR_STATE_LEARNING) {
  13.         p->state = BR_STATE_FORWARDING;
  14.         if (br_is_designated_for_some_port(br))
  15.             br_topology_change_detection(br);
  16.     }
  17.     br_log_state(p);
  18.     spin_unlock(&br->lock);
  19. }
它把端口状态设置为了转发态. 

关于桥下环路问题 即生成树协议,这里不做讨论 . 有兴趣的可以参考《深入理解linux网络内幕》第十五章生成树协议,以及参考相关的rfc.

新版生成树协议:

Rstp    802.1w

Mstp    802.1s  针对多个vlan

当然这里说的还是比较粗略,只是作为一个深入学习的引子.





网友评论

登录后评论
0/500
评论
云之大者
+ 关注