366 lines
21 KiB
XML
366 lines
21 KiB
XML
<?xml version="1.0"?>
|
|
<doc>
|
|
<assembly>
|
|
<name>Lucene.Net.Sandbox</name>
|
|
</assembly>
|
|
<members>
|
|
<member name="T:Lucene.Net.Sandbox.Queries.DuplicateFilter">
|
|
<summary>
|
|
Filter to remove duplicate values from search results.
|
|
<para/>
|
|
WARNING: for this to work correctly, you may have to wrap
|
|
your reader as it cannot current deduplicate across different
|
|
index segments.
|
|
</summary>
|
|
<seealso cref="T:Lucene.Net.Index.SlowCompositeReaderWrapper"/>
|
|
</member>
|
|
<member name="T:Lucene.Net.Sandbox.Queries.KeepMode">
|
|
<summary>
|
|
KeepMode determines which document id to consider as the master, all others being
|
|
identified as duplicates. Selecting the "first occurrence" can potentially save on IO.
|
|
</summary>
|
|
</member>
|
|
<member name="T:Lucene.Net.Sandbox.Queries.ProcessingMode">
|
|
<summary>
|
|
"Full" processing mode starts by setting all bits to false and only setting bits
|
|
for documents that contain the given field and are identified as none-duplicates.
|
|
<para/>
|
|
"Fast" processing sets all bits to true then unsets all duplicate docs found for the
|
|
given field. This approach avoids the need to read DocsEnum for terms that are seen
|
|
to have a document frequency of exactly "1" (i.e. no duplicates). While a potentially
|
|
faster approach , the downside is that bitsets produced will include bits set for
|
|
documents that do not actually contain the field given.
|
|
</summary>
|
|
</member>
|
|
<member name="F:Lucene.Net.Sandbox.Queries.ProcessingMode.PM_FULL_VALIDATION">
|
|
<summary>
|
|
"Full" processing mode starts by setting all bits to false and only setting bits
|
|
for documents that contain the given field and are identified as none-duplicates.
|
|
</summary>
|
|
</member>
|
|
<member name="F:Lucene.Net.Sandbox.Queries.ProcessingMode.PM_FAST_INVALIDATION">
|
|
<summary>
|
|
"Fast" processing sets all bits to true then unsets all duplicate docs found for the
|
|
given field. This approach avoids the need to read DocsEnum for terms that are seen
|
|
to have a document frequency of exactly "1" (i.e. no duplicates). While a potentially
|
|
faster approach , the downside is that bitsets produced will include bits set for
|
|
documents that do not actually contain the field given.
|
|
</summary>
|
|
</member>
|
|
<member name="T:Lucene.Net.Sandbox.Queries.FuzzyLikeThisQuery">
|
|
<summary>
|
|
Fuzzifies ALL terms provided as strings and then picks the best n differentiating terms.
|
|
In effect this mixes the behaviour of <see cref="T:Lucene.Net.Search.FuzzyQuery"/> and MoreLikeThis but with special consideration
|
|
of fuzzy scoring factors.
|
|
This generally produces good results for queries where users may provide details in a number of
|
|
fields and have no knowledge of boolean query syntax and also want a degree of fuzzy matching and
|
|
a fast query.
|
|
<para/>
|
|
For each source term the fuzzy variants are held in a <see cref="T:Lucene.Net.Search.BooleanQuery"/> with no coord factor (because
|
|
we are not looking for matches on multiple variants in any one doc). Additionally, a specialized
|
|
<see cref="T:Lucene.Net.Search.TermQuery"/> is used for variants and does not use that variant term's IDF because this would favour rarer
|
|
terms eg misspellings. Instead, all variants use the same IDF ranking (the one for the source query
|
|
term) and this is factored into the variant's boost. If the source query term does not exist in the
|
|
index the average IDF of the variants is used.
|
|
</summary>
|
|
</member>
|
|
<member name="M:Lucene.Net.Sandbox.Queries.FuzzyLikeThisQuery.#ctor(System.Int32,Lucene.Net.Analysis.Analyzer)">
|
|
<summary>
|
|
|
|
</summary>
|
|
<param name="maxNumTerms">The total number of terms clauses that will appear once rewritten as a <see cref="T:Lucene.Net.Search.BooleanQuery"/></param>
|
|
<param name="analyzer"></param>
|
|
</member>
|
|
<member name="M:Lucene.Net.Sandbox.Queries.FuzzyLikeThisQuery.AddTerms(System.String,System.String,System.Single,System.Int32)">
|
|
<summary>
|
|
Adds user input for "fuzzification"
|
|
</summary>
|
|
<param name="queryString">The string which will be parsed by the analyzer and for which fuzzy variants will be parsed</param>
|
|
<param name="fieldName">The minimum similarity of the term variants (see <see cref="T:Lucene.Net.Search.FuzzyTermsEnum"/>)</param>
|
|
<param name="minSimilarity">Length of required common prefix on variant terms (see <see cref="T:Lucene.Net.Search.FuzzyTermsEnum"/>)</param>
|
|
<param name="prefixLength"></param>
|
|
</member>
|
|
<member name="M:Lucene.Net.Sandbox.Queries.FuzzyLikeThisQuery.ScoreTermQueue.LessThan(Lucene.Net.Sandbox.Queries.FuzzyLikeThisQuery.ScoreTerm,Lucene.Net.Sandbox.Queries.FuzzyLikeThisQuery.ScoreTerm)">
|
|
<summary>
|
|
(non-Javadoc)
|
|
<see cref="M:Lucene.Net.Util.PriorityQueue`1.LessThan(`0,`0)"/>
|
|
</summary>
|
|
</member>
|
|
<member name="M:Lucene.Net.Sandbox.Queries.FuzzyLikeThisQuery.ToString(System.String)">
|
|
<summary>
|
|
(non-Javadoc)
|
|
<see cref="M:Lucene.Net.Search.Query.ToString(System.String)"/>
|
|
</summary>
|
|
<param name="field"></param>
|
|
<returns></returns>
|
|
</member>
|
|
<member name="T:Lucene.Net.Sandbox.Queries.SlowFuzzyQuery">
|
|
<summary>
|
|
Implements the classic fuzzy search query. The similarity measurement
|
|
is based on the Levenshtein (edit distance) algorithm.
|
|
<para/>
|
|
Note that, unlike <see cref="T:Lucene.Net.Search.FuzzyQuery"/>, this query will silently allow
|
|
for a (possibly huge) number of edit distances in comparisons, and may
|
|
be extremely slow (comparing every term in the index).
|
|
</summary>
|
|
</member>
|
|
<member name="M:Lucene.Net.Sandbox.Queries.SlowFuzzyQuery.#ctor(Lucene.Net.Index.Term,System.Single,System.Int32,System.Int32)">
|
|
<summary>
|
|
Create a new <see cref="T:Lucene.Net.Sandbox.Queries.SlowFuzzyQuery"/> that will match terms with a similarity
|
|
of at least <paramref name="minimumSimilarity"/> to <paramref name="term"/>.
|
|
If a <paramref name="prefixLength"/> > 0 is specified, a common prefix
|
|
of that length is also required.
|
|
</summary>
|
|
<param name="term">the term to search for</param>
|
|
<param name="minimumSimilarity">
|
|
a value between 0 and 1 to set the required similarity
|
|
between the query term and the matching terms. For example, for a
|
|
<paramref name="minimumSimilarity"/> of <c>0.5</c> a term of the same length
|
|
as the query term is considered similar to the query term if the edit distance
|
|
between both terms is less than <c>length(term)*0.5</c>
|
|
<para/>
|
|
Alternatively, if <paramref name="minimumSimilarity"/> is >= 1f, it is interpreted
|
|
as a pure Levenshtein edit distance. For example, a value of <c>2f</c>
|
|
will match all terms within an edit distance of <c>2</c> from the
|
|
query term. Edit distances specified in this way may not be fractional.
|
|
</param>
|
|
<param name="prefixLength">length of common (non-fuzzy) prefix</param>
|
|
<param name="maxExpansions">
|
|
the maximum number of terms to match. If this number is
|
|
greater than <see cref="P:Lucene.Net.Search.BooleanQuery.MaxClauseCount"/> when the query is rewritten,
|
|
then the maxClauseCount will be used instead.
|
|
</param>
|
|
<exception cref="T:System.ArgumentException">
|
|
if <paramref name="minimumSimilarity"/> is >= 1 or < 0
|
|
or if <paramref name="prefixLength"/> < 0
|
|
</exception>
|
|
</member>
|
|
<member name="M:Lucene.Net.Sandbox.Queries.SlowFuzzyQuery.#ctor(Lucene.Net.Index.Term,System.Single,System.Int32)">
|
|
<summary>
|
|
Calls <see cref="M:Lucene.Net.Sandbox.Queries.SlowFuzzyQuery.#ctor(Lucene.Net.Index.Term,System.Single)">SlowFuzzyQuery(term, minimumSimilarity, prefixLength, defaultMaxExpansions)</see>.
|
|
</summary>
|
|
</member>
|
|
<member name="M:Lucene.Net.Sandbox.Queries.SlowFuzzyQuery.#ctor(Lucene.Net.Index.Term,System.Single)">
|
|
<summary>
|
|
Calls <see cref="M:Lucene.Net.Sandbox.Queries.SlowFuzzyQuery.#ctor(Lucene.Net.Index.Term,System.Single)">SlowFuzzyQuery(term, minimumSimilarity, 0, defaultMaxExpansions)</see>.
|
|
</summary>
|
|
</member>
|
|
<member name="M:Lucene.Net.Sandbox.Queries.SlowFuzzyQuery.#ctor(Lucene.Net.Index.Term)">
|
|
<summary>
|
|
Calls <see cref="M:Lucene.Net.Sandbox.Queries.SlowFuzzyQuery.#ctor(Lucene.Net.Index.Term,System.Single)">SlowFuzzyQuery(term, defaultMinSimilarity, 0, defaultMaxExpansions)</see>.
|
|
</summary>
|
|
</member>
|
|
<member name="P:Lucene.Net.Sandbox.Queries.SlowFuzzyQuery.MinSimilarity">
|
|
<summary>
|
|
Gets the minimum similarity that is required for this query to match.
|
|
Returns float value between 0.0 and 1.0.
|
|
</summary>
|
|
</member>
|
|
<member name="P:Lucene.Net.Sandbox.Queries.SlowFuzzyQuery.PrefixLength">
|
|
<summary>
|
|
Gets the non-fuzzy prefix length. This is the number of characters at the start
|
|
of a term that must be identical (not fuzzy) to the query term if the query
|
|
is to match that term.
|
|
</summary>
|
|
</member>
|
|
<member name="P:Lucene.Net.Sandbox.Queries.SlowFuzzyQuery.Term">
|
|
<summary>
|
|
Gets the pattern term.
|
|
</summary>
|
|
</member>
|
|
<member name="T:Lucene.Net.Sandbox.Queries.SlowFuzzyTermsEnum">
|
|
<summary>
|
|
Potentially slow fuzzy <see cref="T:Lucene.Net.Index.TermsEnum"/> for enumerating all terms that are similar
|
|
to the specified filter term.
|
|
<para/>
|
|
If the minSimilarity or maxEdits is greater than the Automaton's
|
|
allowable range, this backs off to the classic (brute force)
|
|
fuzzy terms enum method by calling <see cref="M:Lucene.Net.Search.FuzzyTermsEnum.GetAutomatonEnum(System.Int32,Lucene.Net.Util.BytesRef)"/>.
|
|
<para/>
|
|
Term enumerations are always ordered by
|
|
<see cref="P:Lucene.Net.Search.FuzzyTermsEnum.Comparer"/>. Each term in the enumeration is
|
|
greater than all that precede it.
|
|
</summary>
|
|
</member>
|
|
<member name="T:Lucene.Net.Sandbox.Queries.SlowFuzzyTermsEnum.LinearFuzzyTermsEnum">
|
|
<summary>
|
|
Implement fuzzy enumeration with linear brute force.
|
|
</summary>
|
|
</member>
|
|
<member name="F:Lucene.Net.Sandbox.Queries.SlowFuzzyTermsEnum.LinearFuzzyTermsEnum.d">
|
|
<summary>
|
|
Allows us save time required to create a new array
|
|
every time similarity is called.
|
|
</summary>
|
|
</member>
|
|
<member name="F:Lucene.Net.Sandbox.Queries.SlowFuzzyTermsEnum.LinearFuzzyTermsEnum.text">
|
|
<summary>this is the text, minus the prefix</summary>
|
|
</member>
|
|
<member name="M:Lucene.Net.Sandbox.Queries.SlowFuzzyTermsEnum.LinearFuzzyTermsEnum.#ctor(Lucene.Net.Sandbox.Queries.SlowFuzzyTermsEnum)">
|
|
<summary>
|
|
Constructor for enumeration of all terms from specified <c>reader</c> which share a prefix of
|
|
length <c>prefixLength</c> with <c>term</c> and which have a fuzzy similarity >
|
|
<c>minSimilarity</c>.
|
|
<para/>
|
|
After calling the constructor the enumeration is already pointing to the first
|
|
valid term if such a term exists.
|
|
</summary>
|
|
<exception cref="T:System.IO.IOException">If there is a low-level I/O error.</exception>
|
|
</member>
|
|
<member name="F:Lucene.Net.Sandbox.Queries.SlowFuzzyTermsEnum.LinearFuzzyTermsEnum.utf32">
|
|
<summary>used for unicode conversion from BytesRef byte[] to int[]</summary>
|
|
</member>
|
|
<member name="M:Lucene.Net.Sandbox.Queries.SlowFuzzyTermsEnum.LinearFuzzyTermsEnum.Accept(Lucene.Net.Util.BytesRef)">
|
|
<summary>
|
|
<para>
|
|
The termCompare method in FuzzyTermEnum uses Levenshtein distance to
|
|
calculate the distance between the given term and the comparing term.
|
|
</para>
|
|
<para>
|
|
If the minSimilarity is >= 1.0, this uses the maxEdits as the comparison.
|
|
Otherwise, this method uses the following logic to calculate similarity.
|
|
<code>
|
|
similarity = 1 - ((float)distance / (float) (prefixLength + Math.min(textlen, targetlen)));
|
|
</code>
|
|
where distance is the Levenshtein distance for the two words.
|
|
</para>
|
|
</summary>
|
|
</member>
|
|
<member name="M:Lucene.Net.Sandbox.Queries.SlowFuzzyTermsEnum.LinearFuzzyTermsEnum.CalcDistance(System.Int32[],System.Int32,System.Int32)">
|
|
<summary>
|
|
<para>
|
|
<see cref="M:Lucene.Net.Sandbox.Queries.SlowFuzzyTermsEnum.LinearFuzzyTermsEnum.CalcDistance(System.Int32[],System.Int32,System.Int32)"/> returns the Levenshtein distance between the query term
|
|
and the target term.
|
|
</para>
|
|
<para>
|
|
Embedded within this algorithm is a fail-fast Levenshtein distance
|
|
algorithm. The fail-fast algorithm differs from the standard Levenshtein
|
|
distance algorithm in that it is aborted if it is discovered that the
|
|
minimum distance between the words is greater than some threshold.
|
|
</para>
|
|
<para>
|
|
Levenshtein distance (also known as edit distance) is a measure of similarity
|
|
between two strings where the distance is measured as the number of character
|
|
deletions, insertions or substitutions required to transform one string to
|
|
the other string.
|
|
</para>
|
|
</summary>
|
|
<param name="target">the target word or phrase</param>
|
|
<param name="offset">the offset at which to start the comparison</param>
|
|
<param name="length">the length of what's left of the string to compare</param>
|
|
<returns>
|
|
the number of edits or <see cref="F:System.Int32.MaxValue"/> if the edit distance is
|
|
greater than maxDistance.
|
|
</returns>
|
|
</member>
|
|
<member name="M:Lucene.Net.Sandbox.Queries.SlowFuzzyTermsEnum.LinearFuzzyTermsEnum.CalculateMaxDistance(System.Int32)">
|
|
<summary>
|
|
The max Distance is the maximum Levenshtein distance for the text
|
|
compared to some other value that results in score that is
|
|
better than the minimum similarity.
|
|
</summary>
|
|
<param name="m">the length of the "other value"</param>
|
|
<returns>the maximum levenshtein distance that we care about</returns>
|
|
</member>
|
|
<member name="T:Lucene.Net.Sandbox.Queries.SortedSetSortField">
|
|
<summary>
|
|
SortField for <see cref="T:Lucene.Net.Index.SortedSetDocValues"/>.
|
|
<para/>
|
|
A <see cref="T:Lucene.Net.Index.SortedSetDocValues"/> contains multiple values for a field, so sorting with
|
|
this technique "selects" a value as the representative sort value for the document.
|
|
<para/>
|
|
By default, the minimum value in the set is selected as the sort value, but
|
|
this can be customized. Selectors other than the default do have some limitations
|
|
(see below) to ensure that all selections happen in constant-time for performance.
|
|
<para/>
|
|
Like sorting by string, this also supports sorting missing values as first or last,
|
|
via <see cref="M:Lucene.Net.Search.SortField.SetMissingValue(System.Object)"/>.
|
|
<para/>
|
|
Limitations:
|
|
<list type="bullet">
|
|
<item><description>
|
|
Fields containing <see cref="F:System.Int32.MaxValue"/> or more unique values
|
|
are unsupported.
|
|
</description></item>
|
|
<item><description>
|
|
Selectors other than the default <see cref="F:Lucene.Net.Sandbox.Queries.Selector.MIN"/> require
|
|
optional codec support. However several codecs provided by Lucene,
|
|
including the current default codec, support this.
|
|
</description></item>
|
|
</list>
|
|
</summary>
|
|
</member>
|
|
<member name="M:Lucene.Net.Sandbox.Queries.SortedSetSortField.#ctor(System.String,System.Boolean)">
|
|
<summary>
|
|
Creates a sort, possibly in reverse, by the minimum value in the set
|
|
for the document.
|
|
</summary>
|
|
<param name="field">Name of field to sort by. Must not be null.</param>
|
|
<param name="reverse">True if natural order should be reversed.</param>
|
|
</member>
|
|
<member name="M:Lucene.Net.Sandbox.Queries.SortedSetSortField.#ctor(System.String,System.Boolean,Lucene.Net.Sandbox.Queries.Selector)">
|
|
<summary>
|
|
Creates a sort, possibly in reverse, specifying how the sort value from
|
|
the document's set is selected.
|
|
</summary>
|
|
<param name="field">Name of field to sort by. Must not be null.</param>
|
|
<param name="reverse">True if natural order should be reversed.</param>
|
|
<param name="selector">
|
|
custom selector for choosing the sort value from the set.
|
|
<para/>
|
|
NOTE: selectors other than <see cref="F:Lucene.Net.Sandbox.Queries.Selector.MIN"/> require optional codec support.
|
|
</param>
|
|
</member>
|
|
<member name="P:Lucene.Net.Sandbox.Queries.SortedSetSortField.Selector">
|
|
<summary>Returns the selector in use for this sort</summary>
|
|
</member>
|
|
<member name="M:Lucene.Net.Sandbox.Queries.SortedSetSortField.SetMissingValue(System.Object)">
|
|
<summary>
|
|
Set how missing values (the empty set) are sorted.
|
|
<para/>
|
|
Note that this must be <see cref="F:Lucene.Net.Search.SortField.STRING_FIRST"/> or
|
|
<see cref="F:Lucene.Net.Search.SortField.STRING_LAST"/>.
|
|
</summary>
|
|
</member>
|
|
<member name="T:Lucene.Net.Sandbox.Queries.SortedSetSortField.MinValue">
|
|
<summary>Wraps a <see cref="T:Lucene.Net.Index.SortedSetDocValues"/> and returns the first ordinal (min)</summary>
|
|
</member>
|
|
<member name="T:Lucene.Net.Sandbox.Queries.SortedSetSortField.MaxValue">
|
|
<summary>Wraps a <see cref="T:Lucene.Net.Index.SortedSetDocValues"/> and returns the last ordinal (max)</summary>
|
|
</member>
|
|
<member name="T:Lucene.Net.Sandbox.Queries.SortedSetSortField.MiddleMinValue">
|
|
<summary>Wraps a <see cref="T:Lucene.Net.Index.SortedSetDocValues"/> and returns the middle ordinal (or min of the two)</summary>
|
|
</member>
|
|
<member name="T:Lucene.Net.Sandbox.Queries.SortedSetSortField.MiddleMaxValue">
|
|
<summary>Wraps a <see cref="T:Lucene.Net.Index.SortedSetDocValues"/> and returns the middle ordinal (or max of the two)</summary>
|
|
</member>
|
|
<member name="T:Lucene.Net.Sandbox.Queries.Selector">
|
|
<summary>Selects a value from the document's set to use as the sort value</summary>
|
|
</member>
|
|
<member name="F:Lucene.Net.Sandbox.Queries.Selector.MIN">
|
|
<summary>
|
|
Selects the minimum value in the set
|
|
</summary>
|
|
</member>
|
|
<member name="F:Lucene.Net.Sandbox.Queries.Selector.MAX">
|
|
<summary>
|
|
Selects the maximum value in the set
|
|
</summary>
|
|
</member>
|
|
<member name="F:Lucene.Net.Sandbox.Queries.Selector.MIDDLE_MIN">
|
|
<summary>
|
|
Selects the middle value in the set.
|
|
<para/>
|
|
If the set has an even number of values, the lower of the middle two is chosen.
|
|
</summary>
|
|
</member>
|
|
<member name="F:Lucene.Net.Sandbox.Queries.Selector.MIDDLE_MAX">
|
|
<summary>
|
|
Selects the middle value in the set.
|
|
<para/>
|
|
If the set has an even number of values, the higher of the middle two is chosen
|
|
</summary>
|
|
</member>
|
|
</members>
|
|
</doc>
|