using System;
using System.Text.RegularExpressions;
public static class HtmlRemoval
{
public static string StripTagsRegex(string source)
{
return Regex.Replace(source, "<.*?>", string.Empty);
}
static Regex _htmlRegex = new Regex("<.*?>", RegexOptions.Compiled);
public static string StripTagsRegexCompiled(string source)
{
return _htmlRegex.Replace(source, string.Empty);
}
public static string StripTagsCharArray(string source)
{
char[] array = new char[source.Length];
int arrayIndex = 0;
bool inside = false;
for (int i = 0; i < source.Length; i++)
{
char let = source[i];
if (let == '<')
{
inside = true;
continue;
}
if (let == '>')
{
inside = false;
continue;
}
if (!inside)
{
array[arrayIndex] = let;
arrayIndex++;
}
}
return new string(array, 0, arrayIndex);
}
}
using System;
using System.Text.RegularExpressions;
class Program
{
static void Main()
{
const string html = "<p>There was a .NET programmer " +
"and he stripped the HTML tags.</p>";
Console.WriteLine(HtmlRemoval.StripTagsRegex(html));
Console.WriteLine(HtmlRemoval.StripTagsRegexCompiled(html));
Console.WriteLine(HtmlRemoval.StripTagsCharArray(html));
}
}
This gives
<p>There was a <b>.NET</b> programmer " +
"and he stripped the <i>HTML</i> tags.</p>
There was a .NET programmer and he stripped the HTML tags.