|
I have modified a bit of this code so that it will not replace the src attribute in and auto hyperlink the keyword with a target url. When I put in keyword "happy birthday" then it will find this word and autohyperlink to http://happybirthday.com, in the same string then I put in keyword "birthday" then it will become 2 hyperlinks.
So how can I ignore birthday if happy birthday has been hyperlinked and still keep on hyperlink those birthday keyword which does not contain "happy birthday"?
protected string AutoHyperlinks(string strvar, string param, string keyword, string targeturl)
{
// (c)2006 Michael Argentini, http://www.nonsequiturs.com.
//
// Please keep this copyright intact.
// You may use or modify this code however you see fit,
// within the scope of application or web site functionality.
// Distribution of this code as an example or snippet is
// prohibited. In this case, please link to the code example
// on the nonsequiturs.com site directly!
// First, process all <nolink> areas and change period
// characters temporarily to avoid auto-hyperlink processing.
string final = strvar;
Regex regex = new Regex(@"<nolink>(.*?)</nolink>",
RegexOptions.IgnoreCase | RegexOptions.Singleline |
RegexOptions.CultureInvariant |
RegexOptions.IgnorePatternWhitespace |
RegexOptions.Compiled);
MatchCollection theMatches = regex.Matches(strvar);
for (int index = 0; index < theMatches.Count; index++)
{
final = final.Replace(theMatches[index].ToString(),
theMatches[index].ToString().Replace(".", "[[[pk:period]]]"));
}
// Second, process all existing <a> tags and change period
// characters in them temporarily to avoid auto-hyperlink processing.
regex = new Regex(@"<a(.*?)</a>", RegexOptions.IgnoreCase |
RegexOptions.Singleline | RegexOptions.CultureInvariant |
RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
theMatches = regex.Matches(final);
for (int index = 0; index < theMatches.Count; index++)
{
final = final.Replace(theMatches[index].ToString(),
theMatches[index].ToString().Replace(".", "[[[pk:period]]]"));
}
//ken
regex = new Regex(@"<IMG(.*?)>", RegexOptions.IgnoreCase |
RegexOptions.Singleline | RegexOptions.CultureInvariant |
RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
theMatches = regex.Matches(final);
for (int index = 0; index < theMatches.Count; index++)
{
final = final.Replace(theMatches[index].ToString(),
theMatches[index].ToString().Replace(".", "[[[pk:period]]]"));
}
// Third, temporarily alter any digit sequences
// that are formatted like domain names.
final = Regex.Replace(final, @"(?<=\d)\.(?=\d)", "[[[pk:period]]]");
string linkedURL=string.Format("<a href=\"{0}\">{1}</a>", targeturl.Replace(".", "[[[pk:period]]]"), keyword);
//ken
if (final.Contains(linkedURL))
{
final = final.Replace(linkedURL, "[[[pk:linked]]]");
}
// Fourth, look for, and process, any linkable domain names or URLs.
//Regex tags = new Regex(@"([a-zA-Z0-9\:/\-]*[a-zA-Z0-9\-_]\" +
//@".[a-zA-Z0-9\-_][a-zA-Z0-9\-_][a-zA-Z0-9\?\" +
//@"=&#_\-/\.]*[^<>,;\.\s\)\(\]\[\""])");
Regex tags = new Regex(String.Format(CultureInfo.CurrentCulture, @"\b{0}", keyword, RegexOptions.IgnoreCase));
// Fifth, fix any inadvertently altered protocol strings.
//final = tags.Replace(final, "<a href=\"http://$&\"" + param + ">$&</a>");
final = tags.Replace(final, string.Format(CultureInfo.CurrentCulture, "<a href=\"{0}\">{1}</a>", targeturl, keyword, 1));
final = final.Replace("http://https://", "https://");
final = final.Replace("http://http://", "http://");
final = final.Replace("http://ftp://", "ftp://");
final = final.Replace("http://rtsp://", "rtsp://");
final = final.Replace("http://mms://", "mms://");
final = final.Replace("http://pcast://", "pcast://");
final = final.Replace("http://sftp://", "sftp://");
final = final.Replace("[[[pk:period]]]", ".");
final = final.Replace("<nolink>", "");
final = final.Replace("</nolink>", "");
final = final.Replace("[[[pk:linked]]]", string.Format("<a href=\"{0}\">{1}</a>",targeturl, keyword));
// Lastly, return the processed string.
return final;
}
modified on Friday, June 26, 2009 4:34 AM
|
|
|
|
|
For those who haven't followed the previous thread concerning email addresses being linkable and a bug that occurs when a link appears more than once in your source string, this new version fixes those two issues, and has no known bugs at this time. Hope you like it. Please vote for the function (top of page) and give it a good rating!
/// <summary>
/// Convert domain names and url paths to real web links, as well as email addresses to "mailto" links.
/// Existing <a> tags are ignored. Any linkable items within a <nolink></nolink> region are ignored.
/// </summary>
/// <param name="strvar">String to process.</param>
/// <param name="param">String of parameters to insert into the resultant <a> tags, like target="_blank".</param>
/// <returns>Passed "strvar" string with links added</returns>
public static string AutoHyperlinks(string strvar, string param)
{
// (c)2006 Michael Argentini, http://www.nonsequiturs.com.
//
// Please keep this copyright intact.
// You may use or modify this code however you see fit,
// within the scope of application or web site functionality.
// Distribution of this code as an example or snippet is
// prohibited. In this case, please link to the code example
// on the nonsequiturs.com site directly!
string final = strvar;
string section = String.Empty;
Regex regex;
MatchCollection theMatches;
// Switch out periods within a <nolink> region to prevent processing
if(InStr(final, "<nolink>") > 0)
{
regex = new Regex(@"<nolink>(.*?)</nolink>", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.CultureInvariant | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
theMatches = regex.Matches(final);
for (int index = 0; index < theMatches.Count; index++)
{
final = final.Replace(theMatches[index].ToString(), theMatches[index].ToString().Replace(".", "[pk:period]"));
}
}
// Make email addresses mailto links
if(InStr(final, "@") > 0)
{
regex = new Regex(@"([a-zA-Z_0-9.-]+\@[a-zA-Z_0-9.-]+\.\w+)", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.CultureInvariant | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
theMatches = regex.Matches(final);
for (int index = 0; index < theMatches.Count; index++)
{
final = final.Replace(theMatches[index].ToString(), "<a href=\"mailto:" + theMatches[index].ToString() + "\">" + theMatches[index].ToString() + "</a>");
}
}
// Switch out periods within a <a> region to prevent processing
if(InStr(final, "<a") > 0)
{
regex = new Regex(@"<a(.*?)</a>", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.CultureInvariant | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
theMatches = regex.Matches(final);
for (int index = 0; index < theMatches.Count; index++)
{
final = final.Replace(theMatches[index].ToString(), theMatches[index].ToString().Replace(".", "[pk:period]"));
}
}
// Switch out periods within numeric values that appear to be valid domain names
final = Regex.Replace(final, @"(?<=\d)\.(?=\d)", "[pk:period]");
// Identify all potential URLs and domain names and make them hyperlinks
Regex tags = new Regex(@"([a-zA-Z\:/]*[a-zA-Z_0-9.-]+\.[a-zA-Z]{2,}[a-zA-Z0-9\?\=&#_\-/\.]*[^<>,;\.\s\)\(\]\[\""])", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.CultureInvariant | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
theMatches = tags.Matches(final);
for (int index = 0; index < theMatches.Count; index++)
{
section = theMatches[index].ToString();
if(InStr(section, "://") < 1) section = "http://" + section;
final = final.Replace(theMatches[index].ToString(), "<a href=\"" + section.Replace(".", "[pk:period]") + "\" " + param + ">" + theMatches[index].ToString().Replace(".", "[pk:period]") + "</a>");
}
// Clear out escape sequences and <nolink></nolink> tags
final = final.Replace("[pk:period]", ".");
final = final.Replace("<nolink>", "");
final = final.Replace("</nolink>", "");
return final;
}
|
|
|
|
|
Replace the appropriate code snippet with this one. I found a quirk with identifying domain names where textual phrases like "Cake.....yum." were identified as linkable.
REPLACE THIS LINE:
Regex tags = new Regex(@"([a-zA-Z\:/]*[a-zA-Z_0-9.-]+\.[a-zA-Z]{2,}[a-zA-Z0-9\?\=&#_\-/\.]*[^<>,;\.\s\)\(\]\[\""])", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.CultureInvariant | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
WITH THIS LINE:
Regex tags = new Regex(@"([a-zA-Z0-9\:/\-]*[a-zA-Z0-9\-_]\.[a-zA-Z0-9\-_][a-zA-Z0-9\-_][a-zA-Z0-9\?\=&#_\-/\.]*[^<>,;\.\s\)\(\]\[\""])", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.CultureInvariant | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
I had previously streamlined my existing code (which worked really well) to make the regex string smaller and perhaps a bit faster. It introduced the previous bug. The replacement (above) uses my prior regex string, with which I haven't had a problem.
|
|
|
|
|
As regular expressions are still a mystery to me (in fact I don't believe they work at all - I think they are a secret code that redirects all such expressions to a colony of Irish leprechuans on an uninhabited island off the west coast of that Emerald Isle who do the work on abaci before returning the result...) I wonder if you could extend the code to also hyperlink email addresses....
Good stuff..
cheers
Fred
|
|
|
|
|
Here's an updated version of the code, complete with email address processing. Visit my site if you want to get the latest versions of the function (http://www.nonsequiturs.com)
/// <summary>
/// Convert domain names and url paths to real web links, as well as email addresses to "mailto" links.
/// Existing <a> tags are ignored. Any linkable items within a <nolink></nolink> region are ignored.
/// </summary>
/// <param name="strvar">String to process.</param>
/// <param name="param">String of parameters to insert into the resultant <a> tags, like target="_blank".</param>
/// <returns>Passed "strvar" string with links added</returns>
public static string AutoHyperlinks(string strvar, string param)
{
// (c)2006 Michael Argentini, http://www.nonsequiturs.com.
//
// Please keep this copyright intact.
// You may use or modify this code however you see fit,
// within the scope of application or web site functionality.
// Distribution of this code as an example or snippet is
// prohibited. In this case, please link to the code example
// on the nonsequiturs.com site directly!
string final = strvar;
string section = String.Empty;
Regex regex;
MatchCollection theMatches;
// Switch out periods within a <nolink> region to prevent processing
if(InStr(final, "<nolink>") > 0)
{
regex = new Regex(@"<nolink>(.*?)</nolink>", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.CultureInvariant | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
theMatches = regex.Matches(final);
for (int index = 0; index < theMatches.Count; index++)
{
final = final.Replace(theMatches[index].ToString(), theMatches[index].ToString().Replace(".", "[pk:period]"));
}
}
// Make email addresses mailto links
if(InStr(final, "@") > 0)
{
regex = new Regex(@"([a-zA-Z_0-9.-]+\@[a-zA-Z_0-9.-]+\.\w+)", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.CultureInvariant | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
theMatches = regex.Matches(final);
for (int index = 0; index < theMatches.Count; index++)
{
final = final.Replace(theMatches[index].ToString(), "<a href=\"mailto:" + theMatches[index].ToString() + "\">" + theMatches[index].ToString() + "</a>");
}
}
// Switch out periods within a <a> region to prevent processing
if(InStr(final, "<a") > 0)
{
regex = new Regex(@"<a(.*?)</a>", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.CultureInvariant | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
theMatches = regex.Matches(final);
for (int index = 0; index < theMatches.Count; index++)
{
final = final.Replace(theMatches[index].ToString(), theMatches[index].ToString().Replace(".", "[pk:period]"));
}
}
// Switch out periods within numeric values that appear to be valid domain names
final = Regex.Replace(final, @"(?<=\d)\.(?=\d)", "[pk:period]");
// Identify all potential URLs and domain names and make them hyperlinks
Regex tags = new Regex(@"([a-zA-Z\:/]*[a-zA-Z_0-9.-]+\.\w+[a-zA-Z0-9\?\=&#_\-/\.]*[^<>,;\.\s\)\(\]\[\""])", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.CultureInvariant | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
theMatches = tags.Matches(final);
for (int index = 0; index < theMatches.Count; index++)
{
section = theMatches[index].ToString();
if(InStr(section, "://") < 1) section = "http://" + section;
final = final.Replace(theMatches[index].ToString(), "<a href=\"" + section + "\" " + param + ">" + theMatches[index].ToString() + "</a>");
}
// Clear out escape sequences and <nolink></nolink> tags
final = final.Replace("[pk:period]", ".");
final = final.Replace("<nolink>", "");
final = final.Replace("</nolink>", "");
return final;
}
|
|
|
|
|
The previous comment contains a new version of the code. Swap out the appropriate line in that code with the following line (subtle change)
Regex tags = new Regex(@"([a-zA-Z\:/]*[a-zA-Z_0-9.-]+\.[a-zA-Z]{2,}[a-zA-Z0-9\?\=&#_\-/\.]*[^<>,;\.\s\)\(\]\[\""])", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.CultureInvariant | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
This is in the section where linkable items are replaced. The subtle change involves the portion of the regex string which used to use the \w+ to identify a word, or in this case, the top level domain. It was incorrectly recognizing phrases like "a.k.a." as domain names. My change (above) forces it to fins a top level domain with at least 2 characters.
|
|
|
|
|
|
A problem occurs if the same link is repeated in the source text. Ideally, when gathering the regex collection you only want unique matches, rather than all matches. Can regex handle this automatically, or must I write code to account for this myself?
thanks
Fred
|
|
|
|
|
When you say there is a "problem", do you mean that it links all occurrences of the link that is duplicated? If so, that is by design. If you want to only link the first occurrence, I believe regex can do that, or you could sort the matchcollection object and skip duplicates.
|
|
|
|
|
No, I mean a problem:
suppose you have
www.google.com repeated in your text - ie it occurs twice.
When looping thorugh the tags regex it comes across the first match and you get
<a href="http://www.google.com">http://www.google.com</a>
in both cases as expected. Then it comes across the second match and you get
<a href="http://<a href="http://www.google.com">http://www.google.com</a>">http://<a href="http://www.google.com">http://www.google.com</a></a>
repeated twice... (because it's replacing the link text again)
Fred
|
|
|
|
|
Replace the appropriate line where the replacing occurs with the following one:
final = final.Replace(theMatches[index].ToString(), "<a href=\"" + section.Replace(".", "[pk:period]") + "\" " + param + ">" + theMatches[index].ToString().Replace(".", "[pk:period]") + "</a>");
By swapping out the periods, it negates any further matching until the function completes, where it swaps them back to periods. It's the same technique used to protect <nolink> regions.
|
|
|
|
|
yes - nice one. Thanks
Fred
|
|
|
|
|
No problem. I believe that the \k escape sequence can be used to ensure unique results as well, when used in conjunction with a +. My personal preference is to let it link everything. It helps the user, who may have scrolled down the page, find a link when the first occurrence is at the top, and it also helps the writer to be mindful of repeating themselves in their copy.
I hope the code helps you out. It's been very helpful to me. I had a much more mature version but decided to recode it just before initially posting it here, to make it faster and to allow <nolink> regions to span multiple lines. That's why there have been a few bugs, I believe. I use it on my site, so if new ones crop up, I tend to fix them quickly. So check there for the latest version, or to contact me directly.
|
|
|
|
|
Might want to use IndexOfAny instead of InStr - as InStr is not included in the .net framework.
// Make email addresses mailto links
if(final.IndexOfAny("@".ToCharArray()) > 0)
{
|
|
|
|
|
Yes, I neglected to include the InStr method that it uses. It is below:
<br />
public static int InStr(int Start, string String1, string String2)<br />
{<br />
if(Start > 0 && String1.Length >= Start)<br />
{<br />
int retValue = String1.IndexOf(String2, Start - 1) + 1;<br />
return retValue;<br />
}<br />
else<br />
{<br />
return -1;<br />
}<br />
}<br />
<br />
public static int InStr(string String1, string String2)<br />
{<br />
return InStr(1, String1, String2);<br />
}<br />
|
|
|
|
|