<a href="about.html">
<a href="http|https://www.xyz.com">
List inter = new List(); List dates = new List(); int count = 0; List i2 = new List(); WebClient web = new WebClient(); string html = web.DownloadString(textBox1.Text); string n3 = "", s4 = ""; MatchCollection m0 = Regex.Matches(html, @"]*?href[\s]?=[\s\""\']+(?.*?)[\""\']+.*?>(?[^<]+|.*?)?<\/a>", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace); foreach (Match m in m0) { string city = m.Groups[1].Value; Match m2 = Regex.Match(city, "\\s*(?i)href\\s*=\\s*(\"([^\"]*\")|'[^']*'|([^'\">\\s]+))", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace); string city2 = m2.Groups[1].Value; dates.Add(city2); s4 = textBox1.Text; string n2 = s4.Remove(0, 11); n3 = s4.Remove(0, 12); string n4 = s4.Remove(0,7); Match m3 = Regex.Match(city, @"((www\.|(http|https|ftp|news|file)+\:\/\/)[_.a-z0-9-]+\.[a-z0-9\/_:@=.+?,##%&~-]*[^.|\'|\# |!|\(|?|,| |>|<|;|\)])", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace); string s5 = m3.Groups[1].Value; if (m3.Groups[1].Value != s4 && m3.Groups[1].Value != n2 && m3.Groups[1].Value != n3&& m3.Groups[1].Value!=n4) { i2.Add(city); inter.Add(s5); count = 1; } } if (count != 0) { AllLinks.Items.Add(s4); } Hrefs.DataSource = i2; //AllLinks.DataSource = dates; inter.RemoveAll(string.IsNullOrWhiteSpace); ExternalLinks.DataSource = inter;
var
This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)