// Description: Html Agility Pack - HTML Parsers, selectors, traversors, manupulators. // Website & Documentation: http://html-agility-pack.net // Forum & Issues: https://github.com/zzzprojects/html-agility-pack // License: https://github.com/zzzprojects/html-agility-pack/blob/master/LICENSE // More projects: http://www.zzzprojects.com/ // Copyright © ZZZ Projects Inc. 2014 - 2017. All rights reserved. #if !METRO && !NETSTANDARD1_3 using System; using System.Collections; using System.Collections.Generic; using System.Reflection; using System.Xml.XPath; namespace HtmlAgilityPack { public partial class HtmlNode { /// /// Fill an object and go through it's properties and fill them too. /// /// Type of object to want to fill. It should have atleast one property that defined XPath. /// Returns an object of type T including Encapsulated data. /// Why it's thrown. /// Why it's thrown. /// Why it's thrown. /// Why it's thrown. /// Why it's thrown. /// Why it's thrown. /// Why it's thrown. /// Why it's thrown. /// Why it's thrown. public T GetEncapsulatedData() { return (T)GetEncapsulatedData(typeof(T), null); } /// /// Fill an object and go through it's properties and fill them too. /// /// Type of object to want to fill. It should have atleast one property that defined XPath. /// If htmlDocument includes data , leave this parameter null. Else pass your specific htmldocument. /// Returns an object of type T including Encapsulated data. /// Why it's thrown. /// Why it's thrown. /// Why it's thrown. /// Why it's thrown. /// Why it's thrown. /// Why it's thrown. /// Why it's thrown. /// Why it's thrown. /// Why it's thrown. public T GetEncapsulatedData(HtmlDocument htmlDocument) { return (T)GetEncapsulatedData(typeof(T), htmlDocument); } /// /// Fill an object and go through it's properties and fill them too. /// /// Type of object to want to fill. It should have atleast one property that defined XPath. /// If htmlDocument includes data , leave this parameter null. Else pass your specific htmldocument. /// Returns an object of type targetType including Encapsulated data. /// Why it's thrown. /// Why it's thrown. /// Why it's thrown. /// Why it's thrown. /// Why it's thrown. /// Why it's thrown. /// Why it's thrown. /// Why it's thrown. /// Why it's thrown. public object GetEncapsulatedData(Type targetType, HtmlDocument htmlDocument = null) { #region SettingPrerequisite if (targetType == null) { throw new ArgumentNullException("Parameter targetType is null"); } HtmlDocument source; if (htmlDocument == null) { source = OwnerDocument; } else { source = htmlDocument; } object targetObject; if (targetType.IsInstantiable() == false) // if it can not create instanse of T because of lack of constructor in type T. { throw new MissingMethodException("Parameterless Constructor excpected for " + targetType.FullName); } else { targetObject = Activator.CreateInstance(targetType); } #endregion SettingPrerequisite #region targetObject_Defined_XPath if (targetType.IsDefinedAttribute(typeof(HasXPathAttribute)) == true) // Object has xpath attribute (Defined HasXPath) { // Store list of properties that defined xpath attribute IEnumerable validProperties = targetType.GetPropertiesDefinedXPath(); if (validProperties.CountOfIEnumerable() == 0) // if no XPath property exist in type T while T defined HasXpath attribute. { throw new MissingXPathException("Type " + targetType.FullName + " defined HasXPath Attribute but it does not have any property with XPath Attribte."); } else { // Fill targetObject variable Properties ( T targetObject ) foreach (PropertyInfo propertyInfo in validProperties) { // Get xpath attribute from valid properties // for .Net old versions: XPathAttribute xPathAttribute = (propertyInfo.GetCustomAttributes(typeof(XPathAttribute), false) as IList)[0] as XPathAttribute; #region Property_IsNOT_IEnumerable if (propertyInfo.IsIEnumerable() == false) // Property is None-IEnumerable { HtmlNode htmlNode = null; // try to fill htmlNode based on XPath given try { htmlNode = source.DocumentNode.SelectSingleNode(xPathAttribute.XPath); } catch (XPathException ex) // if it can not select node based on given xpath { throw new XPathException(ex.Message + " That means you have a syntax error in XPath property of this Property : " + propertyInfo.PropertyType.FullName + " " + propertyInfo.Name); } catch (Exception ex) { throw new NodeNotFoundException("Cannot find node with giving XPath to bind to " + propertyInfo.PropertyType.FullName + " " + propertyInfo.Name, ex); } if (htmlNode == null) // If Encapsulator could not find Node. { if (propertyInfo.IsDefined(typeof(SkipNodeNotFoundAttribute), false) == true) { // set default value. //throw new Exception("Okey !"); } else { throw new NodeNotFoundException("Cannot find node with giving XPath to bind to " + propertyInfo.PropertyType.FullName + " " + propertyInfo.Name); } } else // if htmlNode is not null (Encapsulator find the Node) { #region Property_Is_HasXPath_UserDefinedClass // Property is None-IEnumerable HasXPath-user-defined class if (propertyInfo.PropertyType.IsDefinedAttribute(typeof(HasXPathAttribute)) == true) { HtmlDocument innerHtmlDocument = new HtmlDocument(); innerHtmlDocument.LoadHtml(htmlNode.InnerHtml); object o = GetEncapsulatedData(propertyInfo.PropertyType, innerHtmlDocument); propertyInfo.SetValue(targetObject, o, null); } #endregion Property_Is_HasXPath_UserDefinedClass #region Property_Is_SimpleType // Property is None-IEnumerable value-type or .Net class or user-defined class. // AND does not deifned xpath and shouldn't have property that defined xpath. else { string result = string.Empty; if (xPathAttribute.AttributeName == null) // It target value of HTMLTag { result = Tools.GetNodeValueBasedOnXPathReturnType(htmlNode, xPathAttribute); } else // It target attribute of HTMLTag { result = htmlNode.GetAttributeValue(xPathAttribute.AttributeName, null); } if (result == null) { throw new NodeAttributeNotFoundException("Can not find " + xPathAttribute.AttributeName + " Attribute in " + htmlNode.Name + " related to " + propertyInfo.PropertyType.FullName + " " + propertyInfo.Name); } object resultCastedToTargetPropertyType; try { resultCastedToTargetPropertyType = Convert.ChangeType(result, propertyInfo.PropertyType); } catch (FormatException) { throw new FormatException("Can not convert Invalid string to " + propertyInfo.PropertyType.FullName + " " + propertyInfo.Name); } catch (Exception ex) { throw new Exception("Unhandled Exception : " + ex.Message); } propertyInfo.SetValue(targetObject, resultCastedToTargetPropertyType, null); } #endregion Property_Is_SimpleType } } #endregion Property_IsNOT_IEnumerable #region Property_Is_IEnumerable else // Property is IEnumerable { IList T_Types = propertyInfo.GetGenericTypes() as IList; // Get T type if (T_Types == null || T_Types.Count == 0) { throw new ArgumentException(propertyInfo.Name + " should have one generic argument."); } else if (T_Types.Count > 1) { throw new ArgumentException(propertyInfo.Name + " should have one generic argument."); } else if (T_Types.Count == 1) // It is NOT something like Dictionary { HtmlNodeCollection nodeCollection; // try to fill nodeCollection based on given xpath. try { nodeCollection = source.DocumentNode.SelectNodes(xPathAttribute.XPath); } catch (XPathException ex) { throw new XPathException(ex.Message + " That means you have a syntax error in XPath property of this Property : " + propertyInfo.PropertyType.FullName + " " + propertyInfo.Name); } catch (Exception ex) { throw new NodeNotFoundException("Cannot find node with giving XPath to bind to " + propertyInfo.PropertyType.FullName + " " + propertyInfo.Name, ex); } if (nodeCollection == null || nodeCollection.Count == 0) { if (propertyInfo.IsDefined(typeof(SkipNodeNotFoundAttribute), false) == true) { // set default value. //throw new Exception("Okey !"); } else { throw new NodeNotFoundException("Cannot find node with giving XPath to bind to " + propertyInfo.PropertyType.FullName + " " + propertyInfo.Name); } } else { IList result = T_Types[0].CreateIListOfType(); #region Property_Is_IEnumerable if (T_Types[0].IsDefinedAttribute(typeof(HasXPathAttribute)) == true) // T is IEnumerable HasXPath-user-defined class (T type Defined XPath properties) { foreach (HtmlNode node in nodeCollection) { HtmlDocument innerHtmlDocument = new HtmlDocument(); innerHtmlDocument.LoadHtml(node.InnerHtml); object o = GetEncapsulatedData(T_Types[0], innerHtmlDocument); result.Add(o); } } #endregion Property_Is_IEnumerable #region Property_Is_IEnumerable else // T is value-type or .Net class or user-defined class ( without xpath ) { if (xPathAttribute.AttributeName == null) // It target value { try { result = Tools.GetNodesValuesBasedOnXPathReturnType(nodeCollection, xPathAttribute, T_Types[0]); } catch (FormatException) { throw new FormatException("Can not convert Invalid string in node collection to " + T_Types[0].FullName + " " + propertyInfo.Name); } catch (Exception ex) { throw new Exception("Unhandled Exception : " + ex.Message); } } else // It target attribute { foreach (HtmlNode node in nodeCollection) { string nodeAttributeValue = node.GetAttributeValue(xPathAttribute.AttributeName, null); if (nodeAttributeValue == null) { throw new NodeAttributeNotFoundException("Can not find " + xPathAttribute.AttributeName + " Attribute in " + node.Name + " related to " + propertyInfo.PropertyType.FullName + " " + propertyInfo.Name); } object resultCastedToTargetPropertyType; try { resultCastedToTargetPropertyType = Convert.ChangeType(nodeAttributeValue, T_Types[0]); } catch (FormatException) // if it can not cast result(string) to type of property. { throw new FormatException("Can not convert Invalid string to " + T_Types[0].FullName + " " + propertyInfo.Name); } catch (Exception ex) { throw new Exception("Unhandled Exception : " + ex.Message); } result.Add(resultCastedToTargetPropertyType); } } } #endregion Property_Is_IEnumerable if (result == null || result.Count == 0) { throw new Exception("Cannot fill " + propertyInfo.PropertyType.FullName + " " + propertyInfo.Name + " because it is null."); } propertyInfo.SetValue(targetObject, result, null); } } } #endregion Property_Is_IEnumerable } return targetObject; } } #endregion targetObject_Defined_XPath #region targetObject_NOTDefined_XPath else // Object doesen't have xpath attribute { throw new MissingXPathException("Type T must define HasXPath attribute and include properties with XPath attribute."); } #endregion targetObject_NOTDefined_XPath } } /// /// Includes tools that GetEncapsulatedData method uses them. /// internal static class Tools { /// /// Determine if a type define an attribute or not , supporting both .NetStandard and .NetFramework2.0 /// /// Type you want to test it. /// Attribute that type must have or not. /// If true , The type parameter define attributeType parameter. internal static bool IsDefinedAttribute(this Type type, Type attributeType) { if (type == null) { throw new ArgumentNullException("Parameter type is null when checking type defined attributeType or not."); } if (attributeType == null) { throw new ArgumentNullException("Parameter attributeType is null when checking type defined attributeType or not."); } #if !(NETSTANDARD1_3 || NETSTANDARD1_6) if (type.IsDefined(attributeType, false) == true) { return true; } else { return false; } #endif #if NETSTANDARD1_3 || NETSTANDARD1_6 if (type.GetTypeInfo().IsDefined(attributeType) == true) { return true; } else { return false; } #endif throw new Exception("Can't Target any platform when checking " + type.FullName + " is a " + attributeType.FullName + " or not."); } /// /// Retrive properties of type that defined . /// /// Type that you want to find it's XPath-Defined properties. /// IEnumerable of property infos of a type , that defined specific attribute. internal static IEnumerable GetPropertiesDefinedXPath(this Type type) { if (type == null) { throw new ArgumentNullException("Parameter type is null while retrieving properties defined XPathAttribute of Type type."); } PropertyInfo[] properties = null; #if !(NETSTANDARD1_3 || NETSTANDARD1_6) properties = type.GetProperties(); #endif #if NETSTANDARD1_3 || NETSTANDARD1_6 properties = type.GetTypeInfo().GetProperties(); #endif return properties.HAPWhere(x => x.IsDefined(typeof(XPathAttribute), false) == true); throw new Exception("Can't Target any platform while retrieving properties defined XPathAttribute of Type type."); } /// /// Determine if a has implemented BUT is considered as NONE-IEnumerable ! /// /// The property info you want to test. /// True if property info is IEnumerable. internal static bool IsIEnumerable(this PropertyInfo propertyInfo) { //return propertyInfo.PropertyType.GetInterface(typeof(IEnumerable<>).FullName) != null; if (propertyInfo == null) { throw new ArgumentNullException("Parameter propertyInfo is null while checking propertyInfo for being IEnumerable or not."); } if (propertyInfo.PropertyType == typeof(string)) { return false; } else { #if !(NETSTANDARD1_3 || NETSTANDARD1_6) return typeof(IEnumerable).IsAssignableFrom(propertyInfo.PropertyType); #endif #if NETSTANDARD1_3 || NETSTANDARD1_6 return typeof(IEnumerable).GetTypeInfo().IsAssignableFrom(propertyInfo.PropertyType); #endif throw new Exception("Can't Target any platform while checking propertyInfo for being IEnumerable or not."); } } /// /// Returns T type(first generic type) of or . /// /// IEnumerable-Implemented property /// List of generic types. internal static IEnumerable GetGenericTypes(this PropertyInfo propertyInfo) { if (propertyInfo == null) { throw new ArgumentNullException("Parameter propertyInfo is null while Getting generic types of Property."); } #if !(NETSTANDARD1_3 || NETSTANDARD1_6) return propertyInfo.PropertyType.GetGenericArguments(); #endif #if NETSTANDARD1_3 || NETSTANDARD1_6 return propertyInfo.PropertyType.GetTypeInfo().GetGenericArguments(); #endif throw new Exception("Can't Target any platform while Getting generic types of Property."); } /// /// Find and Return a mehtod that defined in a class by it's name. /// /// Type of class include requested method. /// Name of requested method as string. /// Method info of requested method. internal static MethodInfo GetMethodByItsName(this Type type, string methodName) { if (type == null) { throw new ArgumentNullException("Parameter type is null while Getting method from it."); } if (methodName == null || methodName == "") { throw new ArgumentNullException("Parameter methodName is null while Getting method from Type type."); } #if !(NETSTANDARD1_3 || NETSTANDARD1_6) return type.GetMethod(methodName); #endif #if NETSTANDARD1_3 || NETSTANDARD1_6 return type.GetTypeInfo().GetMethod(methodName); #endif throw new Exception("Can't Target any platform while getting Method methodName from Type type."); } /// /// Create of given type. /// /// Type that you want to make a List of it. /// Returns IList of given type. internal static IList CreateIListOfType(this Type type) { if (type == null) { throw new ArgumentNullException("Parameter type is null while creating List."); } Type listType = typeof(List<>); Type constructedListType = listType.MakeGenericType(type); return Activator.CreateInstance(constructedListType) as IList; } /// /// Returns the part of value of you want as . /// /// A htmlNode instance. /// Attribute that includes ReturnType /// String that choosen from HtmlNode as result. internal static T GetNodeValueBasedOnXPathReturnType(HtmlNode htmlNode, XPathAttribute xPathAttribute) { if (htmlNode == null) { throw new ArgumentNullException("parameter html node is null"); } if (xPathAttribute == null) { throw new ArgumentNullException("parameter xpathAttribute is null"); } object result; Type TType = typeof(T); switch (xPathAttribute.NodeReturnType) { case ReturnType.InnerHtml: { result = Convert.ChangeType(htmlNode.InnerHtml, TType); } break; case ReturnType.InnerText: { result = Convert.ChangeType(htmlNode.InnerText, TType); } break; case ReturnType.OuterHtml: { result = Convert.ChangeType(htmlNode.OuterHtml, TType); } break; default: throw new Exception(); } return (T)result; } /// /// Returns parts of values of you want as . /// /// that you want to retrive each value. /// A instnce incules . /// Type of IList generic you want. /// internal static IList GetNodesValuesBasedOnXPathReturnType(HtmlNodeCollection htmlNodeCollection, XPathAttribute xPathAttribute, Type listGenericType) { if (htmlNodeCollection == null || htmlNodeCollection.Count == 0) { throw new ArgumentNullException("parameter htmlNodeCollection is null or empty."); } if (xPathAttribute == null) { throw new ArgumentNullException("parameter xpathAttribute is null"); } IList result = listGenericType.CreateIListOfType(); switch (xPathAttribute.NodeReturnType) { case ReturnType.InnerHtml: { foreach (HtmlNode node in htmlNodeCollection) { result.Add(Convert.ChangeType(node.InnerHtml, listGenericType)); } } break; case ReturnType.InnerText: { foreach (HtmlNode node in htmlNodeCollection) { result.Add(Convert.ChangeType(node.InnerText, listGenericType)); } } break; case ReturnType.OuterHtml: { foreach (HtmlNode node in htmlNodeCollection) { result.Add(Convert.ChangeType(node.OuterHtml, listGenericType)); } } break; } return result; } /// /// Simulate Func method to use in Lambada Expression. /// /// /// /// /// internal delegate TResult HAPFunc(T arg); /// /// This method works like Where method in LINQ. /// /// /// /// /// internal static IEnumerable HAPWhere(this IEnumerable source, HAPFunc predicate) { foreach (TSource item in source) { if (predicate(item)) { yield return item; } } } /// /// Check if the type can instantiated. /// /// /// internal static bool IsInstantiable(this Type type) { if (type == null) { throw new ArgumentNullException("type is null"); } #if !(NETSTANDARD1_3 || NETSTANDARD1_6) // checking for having parameterless constructor. if (type.GetConstructor(Type.EmptyTypes) == null) { return false; } else { return true; } #endif #if NETSTANDARD1_3 || NETSTANDARD1_6 // checking for having parameterless constructor. if (type.GetTypeInfo().DeclaredConstructors.HAPWhere(x => x.GetParameters().Length == 0).CountOfIEnumerable() == 0) { return false; } else { return true; } #endif throw new Exception("Can't Target any platform while getting Method methodName from Type type."); } /// /// Returns count of elements stored in IEnumerable of T /// /// /// /// internal static int CountOfIEnumerable(this IEnumerable source) { if (source == null) { throw new ArgumentNullException("Parameter source is null while counting the IEnumerable"); } int counter = 0; foreach (T item in source) { counter++; } return counter; } } /// /// Specify which part of is requested. /// public enum ReturnType { /// /// The text between the start and end tags of the object. /// InnerText, /// /// The HTML between the start and end tags of the object /// InnerHtml, /// /// The object and its content in HTML /// OuterHtml } /// /// Just mark and flag classes to show they have properties that defined . /// [AttributeUsage(AttributeTargets.Class, Inherited = false, AllowMultiple = false)] public sealed class HasXPathAttribute : Attribute { } /// /// Includes XPath and . XPath for finding html tags and for specify which part of you want to return. /// [AttributeUsage(AttributeTargets.Property, Inherited = false, AllowMultiple = false)] public sealed class XPathAttribute : Attribute { /// /// XPath Expression that is used to find related html node. /// public string XPath { get; } /// /// Html Attribute name /// public string AttributeName { get; set; } /// /// The methode of output /// public ReturnType NodeReturnType { get; set; } /// /// Specify Xpath to find related Html Node. /// /// public XPathAttribute(string xpathString) { XPath = xpathString; NodeReturnType = ReturnType.InnerText; } /// /// Specify Xpath to find related Html Node. /// /// /// Specify you want the output include html text too. public XPathAttribute(string xpathString, ReturnType nodeReturnType) { XPath = xpathString; NodeReturnType = nodeReturnType; } /// /// Specify Xpath and Attribute to find related Html Node and its attribute value. /// /// /// public XPathAttribute(string xpathString, string attributeName) { XPath = xpathString; AttributeName = attributeName; } } /// /// Tagging a property with this Attribute make Encapsulator to ignore that property if it causes an error. /// [AttributeUsage(AttributeTargets.Property, Inherited = false, AllowMultiple = false)] public sealed class SkipNodeNotFoundAttribute : Attribute { } /// /// Exception that often occures when there is no way to bind a XPath to a Html Tag. /// public class NodeNotFoundException : Exception { /// /// /// public NodeNotFoundException() { } /// /// /// /// public NodeNotFoundException(string message) : base(message) { } /// /// /// /// /// public NodeNotFoundException(string message, Exception inner) : base(message, inner) { } } /// /// Exception that often occures when there is no way to bind a XPath to a HtmlTag Attribute. /// public class NodeAttributeNotFoundException : Exception { /// /// /// public NodeAttributeNotFoundException() { } /// /// /// /// public NodeAttributeNotFoundException(string message) : base(message) { } /// /// /// /// /// public NodeAttributeNotFoundException(string message, Exception inner) : base(message, inner) { } } /// /// Exception that often occures when there is no property that assigned with XPath Property in Class. /// public class MissingXPathException : Exception { /// /// /// public MissingXPathException() { } /// /// /// /// public MissingXPathException(string message) : base(message) { } /// /// /// /// /// public MissingXPathException(string message, Exception inner) : base(message, inner) { } } } #if FX20 namespace System.Runtime.CompilerServices { [AttributeUsage(AttributeTargets.Method | AttributeTargets.Class | AttributeTargets.Assembly)] public sealed class ExtensionAttribute : Attribute { } } #endif #endif