using System; using System.Collections.Generic; using System.Linq; using System.Text; namespace HtmlAgilityPack { /// /// /// public static class HtmlVarExtension { /// /// 根据XPath来获取属性值 /// /// /// /// 属性名,为空则输出InnerText,-1则输出InnerHtml,-2则输出OuterHtml /// /// static public string GetAttr(this HtmlNode original, string xpath,string attr,string defValue) { if (original == null) { return defValue; } HtmlDocument htmlItem = new HtmlDocument(); htmlItem.LoadHtml(original.OuterHtml); HtmlNodeCollection anchors = htmlItem.DocumentNode.SelectNodes(xpath); if (anchors != null && anchors.Count >= 0) { if (attr.Length == 0) { return System.Web.HttpUtility.HtmlDecode(anchors[0].InnerText); } else if (attr == "-1") { return anchors[0].InnerHtml; } else if (attr == "-2") { return anchors[0].OuterHtml; } else { return anchors[0].GetAttributeValue(attr, defValue); } } return defValue; } /// /// 根据当前节点生成HtmlDocument /// /// /// static public HtmlDocument GetHtmlDocument(this HtmlNode original) { if (original == null) { return null; } HtmlDocument htmlItem = new HtmlDocument(); htmlItem.LoadHtml(original.OuterHtml); return htmlItem; } /// /// 根据XPath来获取属性值 /// /// /// /// 属性名,为空则输出InnerText,-1则输出InnerHtml,-2则输出OuterHtml /// /// static public string GetAttr(this HtmlDocument htmlItem, string xpath, string attr, string defValue) { if(htmlItem == null) { return defValue;} HtmlNodeCollection item = htmlItem.DocumentNode.SelectNodes(xpath); if (item != null && item.Count >= 0) { if (attr.Length == 0) { return System.Web.HttpUtility.HtmlDecode(item[0].InnerText); } else if (attr == "-1") { return item[0].InnerHtml; } else if (attr == "-2") { return item[0].OuterHtml; } else { return item[0].GetAttributeValue(attr, defValue); } } return defValue; } /// /// 根据XPath来查找节点 /// /// /// /// static public HtmlNode GetHtmlNode(this HtmlDocument htmlDoc, string xpath) { HtmlNodeCollection anchors = htmlDoc.DocumentNode.SelectNodes(xpath);//筛选出网站的链接 if (anchors != null) { return anchors[0]; } return null; } /// /// 在当前节点下根据xpath来查找符合的第一个节点 /// /// /// /// static public HtmlNode GetHtmlNode(this HtmlNode node, string xpath) { if(node == null) { return null; } HtmlDocument htmlItem = new HtmlDocument(); htmlItem.LoadHtml(node.OuterHtml); HtmlNodeCollection anchors = htmlItem.DocumentNode.SelectNodes(xpath);//筛选出网站的链接 if (anchors != null) { return anchors[0]; } return null; } /// /// 在当前节点下根据xpath来查找所有符合的节点 /// /// /// /// static public HtmlNodeCollection GetHtmlNodes(this HtmlNode node, string xpath) { if (node == null) { return null; } HtmlDocument htmlItem = new HtmlDocument(); htmlItem.LoadHtml(node.OuterHtml); HtmlNodeCollection anchors = htmlItem.DocumentNode.SelectNodes(xpath);//筛选出网站的链接 return anchors; } } }