AC自动机 - 多模式串的匹配运用 --- HDU 3065

2014-09-30 923

版权

本文内容由阿里云实名注册用户自发贡献，版权归原作者所有，阿里云开发者社区不拥有其著作权，亦不承担相应法律责任。具体规则请查看《阿里云开发者社区用户服务协议》和《阿里云开发者社区知识产权保护指引》。如果您发现本社区中有涉嫌抄袭的内容，填写侵权投诉表单进行举报，一经查实，本社区将立刻删除涉嫌侵权内容。

简介： 病毒侵袭持续中 Problem's Link：http://acm.hdu.edu.cn/showproblem.php?pid=3065 Mean: 略 analyse: AC自动机的运用. 这一题需要将模式串都存储下来，还有就是base的取值一定要弄清楚，由于这题的模式串都是大写字母所以我们可以通过剪枝来加速。

病毒侵袭持续中

Problem's Link：http://acm.hdu.edu.cn/showproblem.php?pid=3065

Mean:

略

analyse:

AC自动机的运用.

这一题需要将模式串都存储下来，还有就是base的取值一定要弄清楚，由于这题的模式串都是大写字母所以我们可以通过剪枝来加速。

Time complexity：o(n)+o(ml)

Source code：

 
 // Memory   Time
 

 // 1347K     0MS
 

 // by : Snarl_jsb
 

 // 2014-09-30-21.00
 

 #include<algorithm>
 

 #include<cstdio>
 

 #include<cstring>
 

 #include<cstdlib>
 

 #include<iostream>
 

 #include<vector>
 

 #include<queue>
 

 #include<stack>
 

 #include<map>
 

 #include<string>
 

 #include<climits>
 

 #include<cmath>
 

 #define LL long long
 

 using 
 namespace 
 std;
 

 

 char 
 backup
 [
 1002
 ][
 53
 ];
 

 int 
 res
 [
 1002
 ];
 

 const 
 int N 
 = 
 1010;
 

 char 
 str
 [
 2000010
 ];
 

 struct 
 node
 

 {
 
    
 node 
 *
 next
 [
 26
 ];     
 //  每个结点都对应26个字母的指针
 
    
 node 
 *
 fail;     
 //      失配指针
 
    
 int 
 count;      
 //
 
    
 int 
 num;
 
    
 node()      
 //  构造函数初始化
 
    
 {
 
        
 for(
 int 
 i 
 = 
 0; 
 i 
 < 
 26; 
 i
 ++)
 
            
 next
 [
 i
 ] 
 = 
 NULL;
 
        
 count 
 = 
 0;
 
        
 num
 =
 0;
 
        
 fail 
 = 
 NULL;
 
    
 }
 

 }
 *
 q
 [
 50
 *N
 ];
 

 node 
 *
 root;
 

 int 
 head
 , 
 tail;
 

 

 void 
 Insert(
 char 
 *
 str
 ,
 int 
 num) 
 //   插入单词.相当于构建一个Trie树
 

 {
 
    
 node 
 *p 
 = 
 root;
 
    
 int 
 i 
 = 
 0
 , 
 index;
 
    
 while(
 str
 [
 i
 ])
 
    
 {
 
        
 index 
 = 
 str
 [
 i
 ] 
 - 
 'A'; 
 //  转化为相对数字来存
 
        
 if(p
 ->
 next
 [
 index
 ] 
 == 
 NULL) 
 // 该字母未插入过
 
            p
 ->
 next
 [
 index
 ] 
 = 
 new 
 node();    
 //  为该字母申请一个结点
 
        p 
 = p
 ->
 next
 [
 index
 ];     
 //   移至下一个
 
        
 i
 ++;
 
    
 }
 
    p
 ->
 count
 ++;     
 //      记录该结点的单词总共插入的次数
 
    p
 ->
 num
 =
 num;
 

 }
 

 void 
 build_ac_automation(
 node 
 *
 root)        
 //      bfs建立fail指针
 

 {
 
    
 root
 ->
 fail 
 = 
 NULL;
 
    
 q
 [
 tail
 ++
 ] 
 = 
 root;
 
    
 while(
 head 
 < 
 tail) 
 {
 
        
 node 
 *
 temp 
 = 
 q
 [
 head
 ++
 ];
 
        
 node 
 *p 
 = 
 NULL;
 
        
 for(
 int 
 i 
 = 
 0; 
 i 
 < 
 26; 
 i
 ++) 
 {
 
            
 if(
 temp
 ->
 next
 [
 i
 ] 
 != 
 NULL) 
 {
 
                
 if(
 temp 
 == 
 root) 
 temp
 ->
 next
 [
 i
 ]
 ->
 fail 
 = 
 root;
 
                
 else 
 {
 
                    p 
 = 
 temp
 ->
 fail;
 
                    
 while(p 
 != 
 NULL) 
 {
 
                        
 if(p
 ->
 next
 [
 i
 ] 
 != 
 NULL) 
 {
 
                            
 temp
 ->
 next
 [
 i
 ]
 ->
 fail 
 = p
 ->
 next
 [
 i
 ];
 
                            
 break;
 
                        
 }
 
                        p 
 = p
 ->
 fail;
 
                    
 }
 
                    
 if(p 
 == 
 NULL) 
 temp
 ->
 next
 [
 i
 ]
 ->
 fail 
 = 
 root;
 
                
 }
 
                
 q
 [
 tail
 ++
 ] 
 = 
 temp
 ->
 next
 [
 i
 ];
 
            
 }
 
        
 }
 
    
 }
 

 }
 

 

 int 
 Query(
 node 
 *
 root)       
 //  匹配 + 统计
 

 {
 
    
 int 
 i 
 = 
 0
 , 
 cnt 
 = 
 0
 , 
 index;
 
    
 node 
 *p 
 = 
 root;
 
    
 while(
 str
 [
 i
 ])
 
    
 {
 
        
 index 
 = 
 str
 [
 i
 ] 
 - 
 'A';
 
        
 if(
 index
 <
 0||
 index
 >
 25)   
 ///这个地方要特别注意，由于病毒只包含大写字母，所以这儿需要剪枝，不剪枝的话其他地方加判断也可以过
 
        
 {
 
            p
 =
 root;
 
            
 i
 ++;
 
            
 continue;
 
        
 }
 
        
 while(p
 ->
 next
 [
 index
 ] 
 == 
 NULL 
 && p 
 != 
 root) 
 //前缀是相同的，所以不管哪个指针走到了count不为0的结点上，那么该结点所代表的单词就匹配成功
 
            p 
 = p
 ->
 fail;
 //失配情况下，p指针指向p->fail.(相当于KMP的next数组)
 
        p 
 = p
 ->
 next
 [
 index
 ];
 //由于现在所在的位置是父节点，所以需要向下移动一个位置
 
        
 if(p 
 == 
 NULL)
 
            p 
 = 
 root; 
 //如果匹配失败，移动到root，重新开始匹配
 
        
 node 
 *
 temp 
 = p;
 //
 
        
 while(
 temp 
 != 
 root 
 && 
 temp
 ->
 count
 >
 0)  
 //统计--如果匹配成功,那么count>1，表示该结点代表的单词数量;否则表示该结点没有单词
 
        
 {
 

 //            cnt += temp->count; //统计该单词出现的次数
 
            
 res
 [
 temp
 ->
 num
 ]
 ++;   
 //每次回溯都会加1
 

 //            temp->count = -1;   //!!!!!!!!!!!!!!!!!(如果要重复统计，请讲这句去掉)!!!!!!!!标记为-1，表示该单词已经加入了cnt中
 
            
 temp 
 = 
 temp
 ->
 fail;
 //判断整条链上的匹配情况
 
        
 }
 
        
 i
 ++;
 
    
 }
 
    
 return 
 cnt;
 

 }
 

 

 int 
 main()
 

 {
 
    
 int n
 ,
 m;
 
    
 while(
 cin
 >>n)
 
    
 {
 
        
 head 
 = 
 tail 
 = 
 0;    
 //  清零
 
        
 root 
 = 
 new 
 node();      
 //  申请新的root结点
 
        
 memset(
 backup
 ,
 0
 ,
 sizeof(
 backup));
 
        
 memset(
 res
 ,
 0
 ,
 sizeof(
 res));
 
        
 for(
 int 
 i
 =
 1;
 i
 <=n;
 ++
 i)
 
        
 {
 
            
 scanf(
 "%s"
 ,
 str);
 
            
 strcpy(
 backup
 [
 i
 ],
 str);
 
            
 Insert(
 str
 ,
 i);
 
        
 }
 
        
 build_ac_automation(
 root);
 
        
 scanf(
 "%s"
 ,
 str);
 
        
 Query(
 root);
 
        
 for(
 int 
 i
 =
 1;
 i
 <=n;
 ++
 i)
 
        
 {
 
            
 if(
 res
 [
 i
 ])
 
            
 {
 
                
 printf(
 "%s: %d
 \n
 "
 ,
 backup
 [
 i
 ],
 res
 [
 i
 ]);
 
            
 }
 
        
 }
 
    
 }
 
    
 return 
 0;
 

 } 

AC自动机 - 多模式串的匹配运用 --- HDU 3065

病毒侵袭持续中

Problem's Link：http://acm.hdu.edu.cn/showproblem.php?pid=3065

热门文章

最新文章

相关电子书