有一文本文件,内容如下: ....... absolute ['1bs4lu:t] absorb [4b's3:b] abstract ['1bstr1kt] ....... 方括号中应该是音标,因为不能正常显示,希望将其删除,总共有5000多个单词,用C#正则表达式,代码如下:
using
System; using System.Collections.Generic; using System.Text; using System.IO; using System.Text.RegularExpressions; namespace Test { class Program { static void Main(string[] args) { //设置字符编码很重要,否则会得到乱码 StreamReader sr = new StreamReader("c:\\1.txt", System.Text.Encoding.Default); StreamWriter sw = new StreamWriter("c:\\2.txt",false , System.Text.Encoding.Default); using(sr) using(sw) { String temp=sr.ReadToEnd(); Regex rx = new Regex(@"\[.*\]", RegexOptions.Multiline);//多行模式 temp=rx.Replace(temp, ""); sw.Write(temp); } //Regex Match Group Capture 的用法 //string text = "One car red car blue car"; //string pat = @"(?<1>\w+)\s+(?<2>car)\s*"; //// Compile the regular expression. //Regex r = new Regex(pat, RegexOptions.IgnoreCase); //// Match the regular expression pattern against a text string. //Match m = r.Match(text); //while (m.Success) //{ // // Display the first match and its capture set. // System.Console.WriteLine("Match=[" + m + "]"); // CaptureCollection cc = m.Captures; // foreach (Capture c in cc) // { // System.Console.WriteLine("Capture=[" + c + "]"); // } // // Display Group1 and its capture set. // Group g1 = m.Groups[1]; // System.Console.WriteLine("Group1=[" + g1 + "]"); // foreach (Capture c1 in g1.Captures) // { // System.Console.WriteLine("Capture1=[" + c1 + "]"); // } // // Display Group2 and its capture set. // Group g2 = m.Groups[2]; // System.Console.WriteLine("Group2=[" + g2 + "]"); // foreach (Capture c2 in g2.Captures) // { // System.Console.WriteLine("Capture2=[" + c2 + "]"); // } // // Advance to the next match. // m = m.NextMatch(); //} } } } 1、读写文本文件的时候必须要设置字符编码,否则中文会出现乱码; 2、设置多行模式 本文转自左洸博客园博客,原文链接:http://www.cnblogs.com/myqiao/archive/2007/07/29/835176.html,如需转载请自行联系原作者 |
|