130 lines
5.0 KiB
C#
130 lines
5.0 KiB
C#
|
using System;
|
|||
|
using System.Collections.Generic;
|
|||
|
using System.Linq;
|
|||
|
using System.Text;
|
|||
|
|
|||
|
namespace HtmlAgilityPack
|
|||
|
{
|
|||
|
/// <summary>
|
|||
|
///
|
|||
|
/// </summary>
|
|||
|
public static class HtmlVarExtension
|
|||
|
{
|
|||
|
/// <summary>
|
|||
|
/// 根据XPath来获取属性值
|
|||
|
/// </summary>
|
|||
|
/// <param name="original"></param>
|
|||
|
/// <param name="xpath"></param>
|
|||
|
/// <param name="attr">属性名,为空则输出InnerText,-1则输出InnerHtml,-2则输出OuterHtml</param>
|
|||
|
/// <param name="defValue"></param>
|
|||
|
/// <returns></returns>
|
|||
|
static public string GetAttr(this HtmlNode original, string xpath,string attr,string defValue)
|
|||
|
{
|
|||
|
if (original == null) { return defValue; }
|
|||
|
HtmlDocument htmlItem = new HtmlDocument();
|
|||
|
htmlItem.LoadHtml(original.OuterHtml);
|
|||
|
HtmlNodeCollection anchors = htmlItem.DocumentNode.SelectNodes(xpath);
|
|||
|
if (anchors != null && anchors.Count >= 0)
|
|||
|
{
|
|||
|
if (attr.Length == 0)
|
|||
|
{ return System.Web.HttpUtility.HtmlDecode(anchors[0].InnerText); }
|
|||
|
else if (attr == "-1")
|
|||
|
{ return anchors[0].InnerHtml; }
|
|||
|
else if (attr == "-2")
|
|||
|
{ return anchors[0].OuterHtml; }
|
|||
|
else
|
|||
|
{
|
|||
|
return anchors[0].GetAttributeValue(attr, defValue);
|
|||
|
}
|
|||
|
}
|
|||
|
return defValue;
|
|||
|
}
|
|||
|
/// <summary>
|
|||
|
/// 根据当前节点生成HtmlDocument
|
|||
|
/// </summary>
|
|||
|
/// <param name="original"></param>
|
|||
|
/// <returns></returns>
|
|||
|
static public HtmlDocument GetHtmlDocument(this HtmlNode original)
|
|||
|
{
|
|||
|
if (original == null) { return null; }
|
|||
|
HtmlDocument htmlItem = new HtmlDocument();
|
|||
|
htmlItem.LoadHtml(original.OuterHtml);
|
|||
|
return htmlItem;
|
|||
|
}
|
|||
|
/// <summary>
|
|||
|
/// 根据XPath来获取属性值
|
|||
|
/// </summary>
|
|||
|
/// <param name="htmlItem"></param>
|
|||
|
/// <param name="xpath"></param>
|
|||
|
/// <param name="attr">属性名,为空则输出InnerText,-1则输出InnerHtml,-2则输出OuterHtml</param>
|
|||
|
/// <param name="defValue"></param>
|
|||
|
/// <returns></returns>
|
|||
|
static public string GetAttr(this HtmlDocument htmlItem, string xpath, string attr, string defValue)
|
|||
|
{
|
|||
|
if(htmlItem == null) { return defValue;}
|
|||
|
HtmlNodeCollection item = htmlItem.DocumentNode.SelectNodes(xpath);
|
|||
|
if (item != null && item.Count >= 0)
|
|||
|
{
|
|||
|
if (attr.Length == 0)
|
|||
|
{ return System.Web.HttpUtility.HtmlDecode(item[0].InnerText); }
|
|||
|
else if (attr == "-1")
|
|||
|
{ return item[0].InnerHtml; }
|
|||
|
else if (attr == "-2")
|
|||
|
{ return item[0].OuterHtml; }
|
|||
|
else
|
|||
|
{
|
|||
|
return item[0].GetAttributeValue(attr, defValue);
|
|||
|
}
|
|||
|
}
|
|||
|
return defValue;
|
|||
|
}
|
|||
|
/// <summary>
|
|||
|
/// 根据XPath来查找节点
|
|||
|
/// </summary>
|
|||
|
/// <param name="htmlDoc"></param>
|
|||
|
/// <param name="xpath"></param>
|
|||
|
/// <returns></returns>
|
|||
|
static public HtmlNode GetHtmlNode(this HtmlDocument htmlDoc, string xpath)
|
|||
|
{
|
|||
|
HtmlNodeCollection anchors = htmlDoc.DocumentNode.SelectNodes(xpath);//筛选出网站的链接
|
|||
|
if (anchors != null)
|
|||
|
{
|
|||
|
return anchors[0];
|
|||
|
}
|
|||
|
return null;
|
|||
|
}
|
|||
|
/// <summary>
|
|||
|
/// 在当前节点下根据xpath来查找符合的第一个节点
|
|||
|
/// </summary>
|
|||
|
/// <param name="node"></param>
|
|||
|
/// <param name="xpath"></param>
|
|||
|
/// <returns></returns>
|
|||
|
static public HtmlNode GetHtmlNode(this HtmlNode node, string xpath)
|
|||
|
{
|
|||
|
if(node == null) { return null; }
|
|||
|
HtmlDocument htmlItem = new HtmlDocument();
|
|||
|
htmlItem.LoadHtml(node.OuterHtml);
|
|||
|
HtmlNodeCollection anchors = htmlItem.DocumentNode.SelectNodes(xpath);//筛选出网站的链接
|
|||
|
if (anchors != null)
|
|||
|
{
|
|||
|
return anchors[0];
|
|||
|
}
|
|||
|
return null;
|
|||
|
}
|
|||
|
/// <summary>
|
|||
|
/// 在当前节点下根据xpath来查找所有符合的节点
|
|||
|
/// </summary>
|
|||
|
/// <param name="node"></param>
|
|||
|
/// <param name="xpath"></param>
|
|||
|
/// <returns></returns>
|
|||
|
static public HtmlNodeCollection GetHtmlNodes(this HtmlNode node, string xpath)
|
|||
|
{
|
|||
|
if (node == null) { return null; }
|
|||
|
HtmlDocument htmlItem = new HtmlDocument();
|
|||
|
htmlItem.LoadHtml(node.OuterHtml);
|
|||
|
HtmlNodeCollection anchors = htmlItem.DocumentNode.SelectNodes(xpath);//筛选出网站的链接
|
|||
|
return anchors;
|
|||
|
}
|
|||
|
}
|
|||
|
}
|