通过c#来实现对于目标的抓取工作,然后直接读取在TXT文档中,这样的操作在工作中可以节省很多的时间方法,想尝试用代码的方式解决这些问题吗?一起来学习试试看吧。
public class Program
{
private static string baseUrl = System.AppDomain.CurrentDomain.SetupInformation.ApplicationBase;
private static void Main(string[] args)
{
string baseReUrl = "https://www.biduoxs.com/";
var client = new RestClient("https://www.biduoxs.com/biquge/17_17005/");
var request = new RestRequest();
Task<RestResponse> response = client.ExecuteAsync(request);
//Console.WriteLine(response.Result.Content);
var html = response.Result.Content;
HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument();
htmlDoc.LoadHtml(html);
var list = htmlDoc.DocumentNode.SelectNodes("//*[@id=\"list\"]/dl/dd/a");
foreach (var item in list)
{
Console.WriteLine(item.InnerText);
//Console.WriteLine(item.Attributes["href"].Value);
System.IO.File.AppendAllText(baseUrl + "xs.txt", item.InnerText + "\n\r");
client = new RestClient(baseReUrl + item.Attributes["href"].Value);
response = client.ExecuteAsync(request);
var html2 = response.Result.Content;
HtmlAgilityPack.HtmlDocument htmlDoc2 = new HtmlAgilityPack.HtmlDocument();
htmlDoc2.LoadHtml(html2);
var content = htmlDoc2.GetElementbyId("content").InnerHtml.Replace("<br>", "\n\r").Replace(" ", " ");
System.IO.File.AppendAllText(baseUrl + "xs.txt", content);
}
}
}
免责声明:文中图文均来自网络,如有侵权请联系删除。
欧普软件发布此文仅为传递信息,不代表欧普软件认同其观点或证实其描述。