// "Therefore those skilled at the unorthodox
// are infinite as heaven and earth,
// inexhaustible as the great rivers.
// When they come to an end,
// they begin again,
// like the days and months;
// they die and are reborn,
// like the four seasons."
//
// - Sun Tsu,
// "The Art of War"
using System.Collections.Generic;
using System.Text.RegularExpressions;
namespace TheArtOfDev.HtmlRenderer.Core.Parse
{
///
/// Collection of regular expressions used when parsing
///
internal static class RegexParserUtils
{
#region Fields and Consts
///
/// Extracts the media types from a media at-rule; e.g. @media print, 3d, screen {
///
public const string CssMediaTypes = @"@media[^\{\}]*\{";
///
/// Extracts defined blocks in CSS.
/// WARNING: Blocks will include blocks inside at-rules.
///
public const string CssBlocks = @"[^\{\}]*\{[^\{\}]*\}";
///
/// Extracts a number; e.g. 5, 6, 7.5, 0.9
///
public const string CssNumber = @"{[0-9]+|[0-9]*\.[0-9]+}";
///
/// Extracts css percentages from the string; e.g. 100% .5% 5.4%
///
public const string CssPercentage = @"([0-9]+|[0-9]*\.[0-9]+)\%";
///
/// Extracts CSS lengths; e.g. 9px 3pt .89em
///
public const string CssLength = @"([0-9]+|[0-9]*\.[0-9]+)(em|ex|px|in|cm|mm|pt|pc)";
///
/// Extracts line-height values (normal, numbers, lengths, percentages)
///
public const string CssLineHeight = "(normal|" + CssNumber + "|" + CssLength + "|" + CssPercentage + ")";
///
/// Extracts font-family values
///
public const string CssFontFamily = "(\"[^\"]*\"|'[^']*'|\\S+\\s*)(\\s*\\,\\s*(\"[^\"]*\"|'[^']*'|\\S+))*";
///
/// Extracts CSS font-styles; e.g. normal italic oblique
///
public const string CssFontStyle = "(normal|italic|oblique)";
///
/// Extracts CSS font-variant values; e.g. normal, small-caps
///
public const string CssFontVariant = "(normal|small-caps)";
///
/// Extracts font-weight values; e.g. normal, bold, bolder...
///
public const string CssFontWeight = "(normal|bold|bolder|lighter|100|200|300|400|500|600|700|800|900)";
///
/// Exracts font sizes: xx-small, larger, small, 34pt, 30%, 2em
///
public const string CssFontSize = "(" + CssLength + "|" + CssPercentage + "|xx-small|x-small|small|medium|large|x-large|xx-large|larger|smaller)";
///
/// Gets the font-size[/line-height]? on the font shorthand property.
/// Check http://www.w3.org/TR/CSS21/fonts.html#font-shorthand
///
public const string CssFontSizeAndLineHeight = CssFontSize + @"(\/" + CssLineHeight + @")?(\s|$)";
///
/// the regexes cache that is used by the parser so not to create regex each time
///
private static readonly Dictionary _regexes = new Dictionary();
#endregion
///
/// Get CSS at rule from the given stylesheet.
///
/// the stylesheet data to retrieve the rule from
/// the index to start the search for the rule, on return will be the value of the end of the found rule
/// the found at rule or null if not exists
public static string GetCssAtRules(string stylesheet, ref int startIdx)
{
startIdx = stylesheet.IndexOf('@', startIdx);
if (startIdx > -1)
{
int count = 1;
int endIdx = stylesheet.IndexOf('{', startIdx);
if (endIdx > -1)
{
while (count > 0 && endIdx < stylesheet.Length)
{
endIdx++;
if (stylesheet[endIdx] == '{')
{
count++;
}
else if (stylesheet[endIdx] == '}')
{
count--;
}
}
if (endIdx < stylesheet.Length)
{
var atrule = stylesheet.Substring(startIdx, endIdx - startIdx + 1);
startIdx = endIdx;
return atrule;
}
}
}
return null;
}
///
/// Extracts matches from the specified source
///
/// Regular expression to extract matches
/// Source to extract matches
/// Collection of matches
public static MatchCollection Match(string regex, string source)
{
var r = GetRegex(regex);
return r.Matches(source);
}
///
/// Searches the specified regex on the source
///
///
///
///
public static string Search(string regex, string source)
{
int position;
return Search(regex, source, out position);
}
///
/// Searches the specified regex on the source
///
///
///
///
///
public static string Search(string regex, string source, out int position)
{
MatchCollection matches = Match(regex, source);
if (matches.Count > 0)
{
position = matches[0].Index;
return matches[0].Value;
}
else
{
position = -1;
}
return null;
}
///
/// Get regex instance for the given regex string.
///
/// the regex string to use
/// the regex instance
private static Regex GetRegex(string regex)
{
Regex r;
if (!_regexes.TryGetValue(regex, out r))
{
r = new Regex(regex, RegexOptions.IgnoreCase | RegexOptions.Singleline);
_regexes[regex] = r;
}
return r;
}
}
}