// Description: Html Agility Pack - HTML Parsers, selectors, traversors, manupulators.
// Website & Documentation: http://html-agility-pack.net
// Forum & Issues: https://github.com/zzzprojects/html-agility-pack
// License: https://github.com/zzzprojects/html-agility-pack/blob/master/LICENSE
// More projects: http://www.zzzprojects.com/
// Copyright © ZZZ Projects Inc. 2014 - 2017. All rights reserved.
#if !METRO && !NETSTANDARD1_3
using System;
using System.Collections;
using System.Collections.Generic;
using System.Reflection;
using System.Xml.XPath;
namespace HtmlAgilityPack
{
public partial class HtmlNode
{
///
/// Fill an object and go through it's properties and fill them too.
///
/// Type of object to want to fill. It should have atleast one property that defined XPath.
/// Returns an object of type T including Encapsulated data.
/// Why it's thrown.
/// Why it's thrown.
/// Why it's thrown.
/// Why it's thrown.
/// Why it's thrown.
/// Why it's thrown.
/// Why it's thrown.
/// Why it's thrown.
/// Why it's thrown.
public T GetEncapsulatedData()
{
return (T)GetEncapsulatedData(typeof(T), null);
}
///
/// Fill an object and go through it's properties and fill them too.
///
/// Type of object to want to fill. It should have atleast one property that defined XPath.
/// If htmlDocument includes data , leave this parameter null. Else pass your specific htmldocument.
/// Returns an object of type T including Encapsulated data.
/// Why it's thrown.
/// Why it's thrown.
/// Why it's thrown.
/// Why it's thrown.
/// Why it's thrown.
/// Why it's thrown.
/// Why it's thrown.
/// Why it's thrown.
/// Why it's thrown.
public T GetEncapsulatedData(HtmlDocument htmlDocument)
{
return (T)GetEncapsulatedData(typeof(T), htmlDocument);
}
///
/// Fill an object and go through it's properties and fill them too.
///
/// Type of object to want to fill. It should have atleast one property that defined XPath.
/// If htmlDocument includes data , leave this parameter null. Else pass your specific htmldocument.
/// Returns an object of type targetType including Encapsulated data.
/// Why it's thrown.
/// Why it's thrown.
/// Why it's thrown.
/// Why it's thrown.
/// Why it's thrown.
/// Why it's thrown.
/// Why it's thrown.
/// Why it's thrown.
/// Why it's thrown.
public object GetEncapsulatedData(Type targetType, HtmlDocument htmlDocument = null)
{
#region SettingPrerequisite
if (targetType == null)
{
throw new ArgumentNullException("Parameter targetType is null");
}
HtmlDocument source;
if (htmlDocument == null)
{
source = OwnerDocument;
}
else
{
source = htmlDocument;
}
object targetObject;
if (targetType.IsInstantiable() == false) // if it can not create instanse of T because of lack of constructor in type T.
{
throw new MissingMethodException("Parameterless Constructor excpected for " + targetType.FullName);
}
else
{
targetObject = Activator.CreateInstance(targetType);
}
#endregion SettingPrerequisite
#region targetObject_Defined_XPath
if (targetType.IsDefinedAttribute(typeof(HasXPathAttribute)) == true) // Object has xpath attribute (Defined HasXPath)
{
// Store list of properties that defined xpath attribute
IEnumerable validProperties = targetType.GetPropertiesDefinedXPath();
if (validProperties.CountOfIEnumerable() == 0) // if no XPath property exist in type T while T defined HasXpath attribute.
{
throw new MissingXPathException("Type " + targetType.FullName +
" defined HasXPath Attribute but it does not have any property with XPath Attribte.");
}
else
{
// Fill targetObject variable Properties ( T targetObject )
foreach (PropertyInfo propertyInfo in validProperties)
{
// Get xpath attribute from valid properties
// for .Net old versions:
XPathAttribute xPathAttribute = (propertyInfo.GetCustomAttributes(typeof(XPathAttribute), false) as IList)[0] as XPathAttribute;
#region Property_IsNOT_IEnumerable
if (propertyInfo.IsIEnumerable() == false) // Property is None-IEnumerable
{
HtmlNode htmlNode = null;
// try to fill htmlNode based on XPath given
try
{
htmlNode = source.DocumentNode.SelectSingleNode(xPathAttribute.XPath);
}
catch (XPathException ex) // if it can not select node based on given xpath
{
throw new XPathException(ex.Message + " That means you have a syntax error in XPath property of this Property : " +
propertyInfo.PropertyType.FullName + " " + propertyInfo.Name);
}
catch (Exception ex)
{
throw new NodeNotFoundException("Cannot find node with giving XPath to bind to " +
propertyInfo.PropertyType.FullName + " " + propertyInfo.Name, ex);
}
if (htmlNode == null) // If Encapsulator could not find Node.
{
if (propertyInfo.IsDefined(typeof(SkipNodeNotFoundAttribute), false) == true)
{
// set default value.
//throw new Exception("Okey !");
}
else
{
throw new NodeNotFoundException("Cannot find node with giving XPath to bind to " +
propertyInfo.PropertyType.FullName + " " + propertyInfo.Name);
}
}
else // if htmlNode is not null (Encapsulator find the Node)
{
#region Property_Is_HasXPath_UserDefinedClass
// Property is None-IEnumerable HasXPath-user-defined class
if (propertyInfo.PropertyType.IsDefinedAttribute(typeof(HasXPathAttribute)) == true)
{
HtmlDocument innerHtmlDocument = new HtmlDocument();
innerHtmlDocument.LoadHtml(htmlNode.InnerHtml);
object o = GetEncapsulatedData(propertyInfo.PropertyType, innerHtmlDocument);
propertyInfo.SetValue(targetObject, o, null);
}
#endregion Property_Is_HasXPath_UserDefinedClass
#region Property_Is_SimpleType
// Property is None-IEnumerable value-type or .Net class or user-defined class.
// AND does not deifned xpath and shouldn't have property that defined xpath.
else
{
string result = string.Empty;
if (xPathAttribute.AttributeName == null) // It target value of HTMLTag
{
result = Tools.GetNodeValueBasedOnXPathReturnType(htmlNode, xPathAttribute);
}
else // It target attribute of HTMLTag
{
result = htmlNode.GetAttributeValue(xPathAttribute.AttributeName, null);
}
if (result == null)
{
throw new NodeAttributeNotFoundException("Can not find " +
xPathAttribute.AttributeName + " Attribute in " + htmlNode.Name +
" related to " + propertyInfo.PropertyType.FullName + " " + propertyInfo.Name);
}
object resultCastedToTargetPropertyType;
try
{
resultCastedToTargetPropertyType = Convert.ChangeType(result, propertyInfo.PropertyType);
}
catch (FormatException)
{
throw new FormatException("Can not convert Invalid string to " + propertyInfo.PropertyType.FullName + " " + propertyInfo.Name);
}
catch (Exception ex)
{
throw new Exception("Unhandled Exception : " + ex.Message);
}
propertyInfo.SetValue(targetObject, resultCastedToTargetPropertyType, null);
}
#endregion Property_Is_SimpleType
}
}
#endregion Property_IsNOT_IEnumerable
#region Property_Is_IEnumerable
else // Property is IEnumerable
{
IList T_Types = propertyInfo.GetGenericTypes() as IList; // Get T type
if (T_Types == null || T_Types.Count == 0)
{
throw new ArgumentException(propertyInfo.Name + " should have one generic argument.");
}
else if (T_Types.Count > 1)
{
throw new ArgumentException(propertyInfo.Name + " should have one generic argument.");
}
else if (T_Types.Count == 1) // It is NOT something like Dictionary
{
HtmlNodeCollection nodeCollection;
// try to fill nodeCollection based on given xpath.
try
{
nodeCollection = source.DocumentNode.SelectNodes(xPathAttribute.XPath);
}
catch (XPathException ex)
{
throw new XPathException(ex.Message + " That means you have a syntax error in XPath property of this Property : " +
propertyInfo.PropertyType.FullName + " " + propertyInfo.Name);
}
catch (Exception ex)
{
throw new NodeNotFoundException("Cannot find node with giving XPath to bind to " +
propertyInfo.PropertyType.FullName + " " + propertyInfo.Name, ex);
}
if (nodeCollection == null || nodeCollection.Count == 0)
{
if (propertyInfo.IsDefined(typeof(SkipNodeNotFoundAttribute), false) == true)
{
// set default value.
//throw new Exception("Okey !");
}
else
{
throw new NodeNotFoundException("Cannot find node with giving XPath to bind to " +
propertyInfo.PropertyType.FullName + " " + propertyInfo.Name);
}
}
else
{
IList result = T_Types[0].CreateIListOfType();
#region Property_Is_IEnumerable
if (T_Types[0].IsDefinedAttribute(typeof(HasXPathAttribute)) == true) // T is IEnumerable HasXPath-user-defined class (T type Defined XPath properties)
{
foreach (HtmlNode node in nodeCollection)
{
HtmlDocument innerHtmlDocument = new HtmlDocument();
innerHtmlDocument.LoadHtml(node.InnerHtml);
object o = GetEncapsulatedData(T_Types[0], innerHtmlDocument);
result.Add(o);
}
}
#endregion Property_Is_IEnumerable
#region Property_Is_IEnumerable
else // T is value-type or .Net class or user-defined class ( without xpath )
{
if (xPathAttribute.AttributeName == null) // It target value
{
try
{
result = Tools.GetNodesValuesBasedOnXPathReturnType(nodeCollection, xPathAttribute, T_Types[0]);
}
catch (FormatException)
{
throw new FormatException("Can not convert Invalid string in node collection to " + T_Types[0].FullName + " " + propertyInfo.Name);
}
catch (Exception ex)
{
throw new Exception("Unhandled Exception : " + ex.Message);
}
}
else // It target attribute
{
foreach (HtmlNode node in nodeCollection)
{
string nodeAttributeValue = node.GetAttributeValue(xPathAttribute.AttributeName, null);
if (nodeAttributeValue == null)
{
throw new NodeAttributeNotFoundException("Can not find " + xPathAttribute.AttributeName + " Attribute in " + node.Name + " related to " +
propertyInfo.PropertyType.FullName + " " + propertyInfo.Name);
}
object resultCastedToTargetPropertyType;
try
{
resultCastedToTargetPropertyType = Convert.ChangeType(nodeAttributeValue, T_Types[0]);
}
catch (FormatException) // if it can not cast result(string) to type of property.
{
throw new FormatException("Can not convert Invalid string to " + T_Types[0].FullName + " " + propertyInfo.Name);
}
catch (Exception ex)
{
throw new Exception("Unhandled Exception : " + ex.Message);
}
result.Add(resultCastedToTargetPropertyType);
}
}
}
#endregion Property_Is_IEnumerable
if (result == null || result.Count == 0)
{
throw new Exception("Cannot fill " + propertyInfo.PropertyType.FullName + " " + propertyInfo.Name + " because it is null.");
}
propertyInfo.SetValue(targetObject, result, null);
}
}
}
#endregion Property_Is_IEnumerable
}
return targetObject;
}
}
#endregion targetObject_Defined_XPath
#region targetObject_NOTDefined_XPath
else // Object doesen't have xpath attribute
{
throw new MissingXPathException("Type T must define HasXPath attribute and include properties with XPath attribute.");
}
#endregion targetObject_NOTDefined_XPath
}
}
///
/// Includes tools that GetEncapsulatedData method uses them.
///
internal static class Tools
{
///
/// Determine if a type define an attribute or not , supporting both .NetStandard and .NetFramework2.0
///
/// Type you want to test it.
/// Attribute that type must have or not.
/// If true , The type parameter define attributeType parameter.
internal static bool IsDefinedAttribute(this Type type, Type attributeType)
{
if (type == null)
{
throw new ArgumentNullException("Parameter type is null when checking type defined attributeType or not.");
}
if (attributeType == null)
{
throw new ArgumentNullException("Parameter attributeType is null when checking type defined attributeType or not.");
}
#if !(NETSTANDARD1_3 || NETSTANDARD1_6)
if (type.IsDefined(attributeType, false) == true)
{
return true;
}
else
{
return false;
}
#endif
#if NETSTANDARD1_3 || NETSTANDARD1_6
if (type.GetTypeInfo().IsDefined(attributeType) == true)
{
return true;
}
else
{
return false;
}
#endif
throw new Exception("Can't Target any platform when checking " + type.FullName + " is a " + attributeType.FullName + " or not.");
}
///
/// Retrive properties of type that defined .
///
/// Type that you want to find it's XPath-Defined properties.
/// IEnumerable of property infos of a type , that defined specific attribute.
internal static IEnumerable GetPropertiesDefinedXPath(this Type type)
{
if (type == null)
{
throw new ArgumentNullException("Parameter type is null while retrieving properties defined XPathAttribute of Type type.");
}
PropertyInfo[] properties = null;
#if !(NETSTANDARD1_3 || NETSTANDARD1_6)
properties = type.GetProperties();
#endif
#if NETSTANDARD1_3 || NETSTANDARD1_6
properties = type.GetTypeInfo().GetProperties();
#endif
return properties.HAPWhere(x => x.IsDefined(typeof(XPathAttribute), false) == true);
throw new Exception("Can't Target any platform while retrieving properties defined XPathAttribute of Type type.");
}
///
/// Determine if a has implemented BUT is considered as NONE-IEnumerable !
///
/// The property info you want to test.
/// True if property info is IEnumerable.
internal static bool IsIEnumerable(this PropertyInfo propertyInfo)
{
//return propertyInfo.PropertyType.GetInterface(typeof(IEnumerable<>).FullName) != null;
if (propertyInfo == null)
{
throw new ArgumentNullException("Parameter propertyInfo is null while checking propertyInfo for being IEnumerable or not.");
}
if (propertyInfo.PropertyType == typeof(string))
{
return false;
}
else
{
#if !(NETSTANDARD1_3 || NETSTANDARD1_6)
return typeof(IEnumerable).IsAssignableFrom(propertyInfo.PropertyType);
#endif
#if NETSTANDARD1_3 || NETSTANDARD1_6
return typeof(IEnumerable).GetTypeInfo().IsAssignableFrom(propertyInfo.PropertyType);
#endif
throw new Exception("Can't Target any platform while checking propertyInfo for being IEnumerable or not.");
}
}
///
/// Returns T type(first generic type) of or .
///
/// IEnumerable-Implemented property
/// List of generic types.
internal static IEnumerable GetGenericTypes(this PropertyInfo propertyInfo)
{
if (propertyInfo == null)
{
throw new ArgumentNullException("Parameter propertyInfo is null while Getting generic types of Property.");
}
#if !(NETSTANDARD1_3 || NETSTANDARD1_6)
return propertyInfo.PropertyType.GetGenericArguments();
#endif
#if NETSTANDARD1_3 || NETSTANDARD1_6
return propertyInfo.PropertyType.GetTypeInfo().GetGenericArguments();
#endif
throw new Exception("Can't Target any platform while Getting generic types of Property.");
}
///
/// Find and Return a mehtod that defined in a class by it's name.
///
/// Type of class include requested method.
/// Name of requested method as string.
/// Method info of requested method.
internal static MethodInfo GetMethodByItsName(this Type type, string methodName)
{
if (type == null)
{
throw new ArgumentNullException("Parameter type is null while Getting method from it.");
}
if (methodName == null || methodName == "")
{
throw new ArgumentNullException("Parameter methodName is null while Getting method from Type type.");
}
#if !(NETSTANDARD1_3 || NETSTANDARD1_6)
return type.GetMethod(methodName);
#endif
#if NETSTANDARD1_3 || NETSTANDARD1_6
return type.GetTypeInfo().GetMethod(methodName);
#endif
throw new Exception("Can't Target any platform while getting Method methodName from Type type.");
}
///
/// Create of given type.
///
/// Type that you want to make a List of it.
/// Returns IList of given type.
internal static IList CreateIListOfType(this Type type)
{
if (type == null)
{
throw new ArgumentNullException("Parameter type is null while creating List.");
}
Type listType = typeof(List<>);
Type constructedListType = listType.MakeGenericType(type);
return Activator.CreateInstance(constructedListType) as IList;
}
///
/// Returns the part of value of you want as .
///
/// A htmlNode instance.
/// Attribute that includes ReturnType
/// String that choosen from HtmlNode as result.
internal static T GetNodeValueBasedOnXPathReturnType(HtmlNode htmlNode, XPathAttribute xPathAttribute)
{
if (htmlNode == null)
{
throw new ArgumentNullException("parameter html node is null");
}
if (xPathAttribute == null)
{
throw new ArgumentNullException("parameter xpathAttribute is null");
}
object result;
Type TType = typeof(T);
switch (xPathAttribute.NodeReturnType)
{
case ReturnType.InnerHtml:
{
result = Convert.ChangeType(htmlNode.InnerHtml, TType);
}
break;
case ReturnType.InnerText:
{
result = Convert.ChangeType(htmlNode.InnerText, TType);
}
break;
case ReturnType.OuterHtml:
{
result = Convert.ChangeType(htmlNode.OuterHtml, TType);
}
break;
default: throw new Exception();
}
return (T)result;
}
///
/// Returns parts of values of you want as .
///
/// that you want to retrive each value.
/// A instnce incules .
/// Type of IList generic you want.
///
internal static IList GetNodesValuesBasedOnXPathReturnType(HtmlNodeCollection htmlNodeCollection, XPathAttribute xPathAttribute, Type listGenericType)
{
if (htmlNodeCollection == null || htmlNodeCollection.Count == 0)
{
throw new ArgumentNullException("parameter htmlNodeCollection is null or empty.");
}
if (xPathAttribute == null)
{
throw new ArgumentNullException("parameter xpathAttribute is null");
}
IList result = listGenericType.CreateIListOfType();
switch (xPathAttribute.NodeReturnType)
{
case ReturnType.InnerHtml:
{
foreach (HtmlNode node in htmlNodeCollection)
{
result.Add(Convert.ChangeType(node.InnerHtml, listGenericType));
}
}
break;
case ReturnType.InnerText:
{
foreach (HtmlNode node in htmlNodeCollection)
{
result.Add(Convert.ChangeType(node.InnerText, listGenericType));
}
}
break;
case ReturnType.OuterHtml:
{
foreach (HtmlNode node in htmlNodeCollection)
{
result.Add(Convert.ChangeType(node.OuterHtml, listGenericType));
}
}
break;
}
return result;
}
///
/// Simulate Func method to use in Lambada Expression.
///
///
///
///
///
internal delegate TResult HAPFunc(T arg);
///
/// This method works like Where method in LINQ.
///
///
///
///
///
internal static IEnumerable HAPWhere(this IEnumerable source, HAPFunc predicate)
{
foreach (TSource item in source)
{
if (predicate(item))
{
yield return item;
}
}
}
///
/// Check if the type can instantiated.
///
///
///
internal static bool IsInstantiable(this Type type)
{
if (type == null)
{
throw new ArgumentNullException("type is null");
}
#if !(NETSTANDARD1_3 || NETSTANDARD1_6)
// checking for having parameterless constructor.
if (type.GetConstructor(Type.EmptyTypes) == null)
{
return false;
}
else
{
return true;
}
#endif
#if NETSTANDARD1_3 || NETSTANDARD1_6
// checking for having parameterless constructor.
if (type.GetTypeInfo().DeclaredConstructors.HAPWhere(x => x.GetParameters().Length == 0).CountOfIEnumerable() == 0)
{
return false;
}
else
{
return true;
}
#endif
throw new Exception("Can't Target any platform while getting Method methodName from Type type.");
}
///
/// Returns count of elements stored in IEnumerable of T
///
///
///
///
internal static int CountOfIEnumerable(this IEnumerable source)
{
if (source == null)
{
throw new ArgumentNullException("Parameter source is null while counting the IEnumerable");
}
int counter = 0;
foreach (T item in source)
{
counter++;
}
return counter;
}
}
///
/// Specify which part of is requested.
///
public enum ReturnType
{
///
/// The text between the start and end tags of the object.
///
InnerText,
///
/// The HTML between the start and end tags of the object
///
InnerHtml,
///
/// The object and its content in HTML
///
OuterHtml
}
///
/// Just mark and flag classes to show they have properties that defined .
///
[AttributeUsage(AttributeTargets.Class, Inherited = false, AllowMultiple = false)]
public sealed class HasXPathAttribute : Attribute
{
}
///
/// Includes XPath and . XPath for finding html tags and for specify which part of you want to return.
///
[AttributeUsage(AttributeTargets.Property, Inherited = false, AllowMultiple = false)]
public sealed class XPathAttribute : Attribute
{
///
/// XPath Expression that is used to find related html node.
///
public string XPath { get; }
///
/// Html Attribute name
///
public string AttributeName { get; set; }
///
/// The methode of output
///
public ReturnType NodeReturnType { get; set; }
///
/// Specify Xpath to find related Html Node.
///
///
public XPathAttribute(string xpathString)
{
XPath = xpathString;
NodeReturnType = ReturnType.InnerText;
}
///
/// Specify Xpath to find related Html Node.
///
///
/// Specify you want the output include html text too.
public XPathAttribute(string xpathString, ReturnType nodeReturnType)
{
XPath = xpathString;
NodeReturnType = nodeReturnType;
}
///
/// Specify Xpath and Attribute to find related Html Node and its attribute value.
///
///
///
public XPathAttribute(string xpathString, string attributeName)
{
XPath = xpathString;
AttributeName = attributeName;
}
}
///
/// Tagging a property with this Attribute make Encapsulator to ignore that property if it causes an error.
///
[AttributeUsage(AttributeTargets.Property, Inherited = false, AllowMultiple = false)]
public sealed class SkipNodeNotFoundAttribute : Attribute
{
}
///
/// Exception that often occures when there is no way to bind a XPath to a Html Tag.
///
public class NodeNotFoundException : Exception
{
///
///
///
public NodeNotFoundException() { }
///
///
///
///
public NodeNotFoundException(string message) : base(message) { }
///
///
///
///
///
public NodeNotFoundException(string message, Exception inner) : base(message, inner) { }
}
///
/// Exception that often occures when there is no way to bind a XPath to a HtmlTag Attribute.
///
public class NodeAttributeNotFoundException : Exception
{
///
///
///
public NodeAttributeNotFoundException() { }
///
///
///
///
public NodeAttributeNotFoundException(string message) : base(message) { }
///
///
///
///
///
public NodeAttributeNotFoundException(string message, Exception inner) : base(message, inner) { }
}
///
/// Exception that often occures when there is no property that assigned with XPath Property in Class.
///
public class MissingXPathException : Exception
{
///
///
///
public MissingXPathException() { }
///
///
///
///
public MissingXPathException(string message) : base(message) { }
///
///
///
///
///
public MissingXPathException(string message, Exception inner) : base(message, inner) { }
}
}
#if FX20
namespace System.Runtime.CompilerServices
{
[AttributeUsage(AttributeTargets.Method |
AttributeTargets.Class | AttributeTargets.Assembly)]
public sealed class ExtensionAttribute : Attribute
{
}
}
#endif
#endif