Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -182,14 +182,31 @@ public Analyzer getAnalyzer(NodePath nodePath) {
return analyzers.getDefaultAnalyzer();
}


public Analyzer getAnalyzer(String field) {
LuceneIndexConfig config = namedIndexes.get(field);
if (config != null) {
String id = config.getAnalyzerId();
if (id != null)
return analyzers.getAnalyzerById(config.getAnalyzerId());
String id = config != null ? config.getAnalyzerId() : null;
if (id == null)
return analyzers.getDefaultAnalyzer();

final String indexSuffix = ":index";
if (id.endsWith(indexSuffix)) {
// Substitute <analyzer-id>:index with <analyzer-id>:query
String qid = id.substring(0, id.length() - indexSuffix.length()) + ":query";
Analyzer queryAnalyzer = analyzers.getAnalyzerById(qid);
if (queryAnalyzer != null)
return queryAnalyzer;

LOG.warn(String.format("Failed to substitute %s with %s analyzer", id, qid));
}
return analyzers.getDefaultAnalyzer();
return analyzers.getAnalyzerById(config.getAnalyzerId());
}

/** Gets the Analyzer (defined in this LuceneConfig) with the specified id.
* Returns null if no match was found.
*/
public Analyzer getAnalyzerById(String analyzerId) {
return analyzers.getAnalyzerById(analyzerId);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -435,7 +435,7 @@ public NodeSet query(int contextId, DocumentSet docs, NodeSet contextSet,
for (QName qname : definedIndexes) {
String field = LuceneUtil.encodeQName(qname, index.getBrokerPool().getSymbols());
LuceneConfig config = getLuceneConfig(broker, docs);
Analyzer analyzer = getAnalyzer(config,null, qname);
Analyzer analyzer = getQueryAnalyzer(config,null, qname, options);
Query query;
if (queryStr == null) {
query = new ConstantScoreQuery(new FieldValueFilter(field));
Expand Down Expand Up @@ -485,7 +485,7 @@ public NodeSet query(int contextId, DocumentSet docs, NodeSet contextSet,
for (QName qname : definedIndexes) {
String field = LuceneUtil.encodeQName(qname, index.getBrokerPool().getSymbols());
LuceneConfig config = getLuceneConfig(broker, docs);
analyzer = getAnalyzer(config, null, qname);
analyzer = getQueryAnalyzer(config, null, qname, options);
Query query = queryRoot == null ? new ConstantScoreQuery(new FieldValueFilter(field)) : queryTranslator.parse(field, queryRoot, analyzer, options);
Optional<Map<String, List<String>>> facets = options.getFacets();
if (facets.isPresent() && config != null) {
Expand All @@ -507,7 +507,7 @@ public NodeSet queryField(int contextId, DocumentSet docs, NodeSet contextSet,
final NodeSet resultSet = new NewArrayNodeSet();
final boolean returnAncestor = axis == NodeSet.ANCESTOR;
final LuceneConfig config = getLuceneConfig(broker, docs);
analyzer = getAnalyzer(config, field, null);
analyzer = getQueryAnalyzer(config, field, null, options);
final Query query = queryTranslator.parse(field, queryRoot, analyzer, options);
if (query != null) {
searchAndProcess(contextId, null, docs, contextSet, resultSet,
Expand Down Expand Up @@ -580,7 +580,7 @@ public NodeSet queryField(XQueryContext context, int contextId, DocumentSet docs
NodeSet resultSet = new NewArrayNodeSet();
boolean returnAncestor = axis == NodeSet.ANCESTOR;
LuceneConfig config = getLuceneConfig(context.getBroker(), docs);
Analyzer analyzer = getAnalyzer(config, field, null);
Analyzer analyzer = getQueryAnalyzer(config, field, null, options);
LOG.debug("Using analyzer " + analyzer + " for " + queryString);
QueryParserWrapper parser = getQueryParser(field, analyzer, docs);
options.configureParser(parser.getConfiguration());
Expand Down Expand Up @@ -1064,10 +1064,17 @@ private static boolean matchQName(QName qname, QName candidate) {
*
* @return the analyzer or null
*/
@Nullable protected Analyzer getAnalyzer(LuceneConfig config, String field, QName qname) {
@Nullable protected Analyzer getQueryAnalyzer(LuceneConfig config, String field, QName qname, QueryOptions opts) {
if (config != null) {
Analyzer analyzer;
if (field == null) {
if (opts.getQueryAnalyzerId() != null) {
analyzer = config.getAnalyzerById(opts.getQueryAnalyzerId());
if (analyzer == null) {
String msg = String.format("getAnalyzerById('%s') returned null!", opts.getQueryAnalyzerId());
LOG.error(msg);
}
}
else if (field == null) {
analyzer = config.getAnalyzer(qname);
} else {
analyzer = config.getAnalyzer(field);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,14 @@ public class QueryOptions {
public static final String OPTION_LOWERCASE_EXPANDED_TERMS = "lowercase-expanded-terms";
public static final String OPTION_FACETS = "facets";
public static final String OPTION_FIELDS = "fields";
public static final String OPTION_QUERY_ANALYZER_ID = "query-analyzer-id";

protected enum DefaultOperator {
OR,
AND
}

protected String queryAnalyzerId = null;
protected DefaultOperator defaultOperator = DefaultOperator.AND;
protected boolean allowLeadingWildcard = false;
protected Optional<Integer> phraseSlop = Optional.empty();
Expand Down Expand Up @@ -151,6 +153,8 @@ private void set(String key, String value) throws XPathException {
case OPTION_LOWERCASE_EXPANDED_TERMS:
lowercaseExpandedTerms = value.equalsIgnoreCase("yes");
break;
case OPTION_QUERY_ANALYZER_ID:
queryAnalyzerId = value;
default:
// unknown option, ignore
break;
Expand Down Expand Up @@ -181,4 +185,6 @@ public void configureParser(CommonQueryParserConfiguration parser) {
parser.setLowercaseExpandedTerms(lowercaseExpandedTerms);
}
}

public String getQueryAnalyzerId() { return queryAnalyzerId; }
}
103 changes: 101 additions & 2 deletions extensions/indexes/lucene/src/test/xquery/lucene/analyzers.xml
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,18 @@
<!-- Lucene index is configured below -->
<lucene>
<analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
<analyzer id="ws" class="org.apache.lucene.analysis.core.WhitespaceAnalyzer"/>
<analyzer id="keyword" class="org.apache.lucene.analysis.core.KeywordAnalyzer"/>
<analyzer id="de" class="org.apache.lucene.analysis.de.GermanAnalyzer"/>
<text field="line" qname="l"/>
<text qname="l" analyzer="de"/>
<analyzer id="de-nostop" class="org.apache.lucene.analysis.de.GermanAnalyzer">
<param name="stopwords" type="org.apache.lucene.analysis.util.CharArraySet"></param>
</analyzer>

<text qname="l" field="line" />
<text qname="l" analyzer="de">
<field name="l_ws" analyzer="ws"/>
<field name="l_nostop" analyzer="de-nostop"/>
</text>
<text field="lineno" qname="@n" analyzer="keyword"/>
</lucene>
</index>
Expand Down Expand Up @@ -71,11 +79,102 @@
<l n="l1.12">Das will mir schier das Herz verbrennen.</l>
</expected>
</test>
<test output="xml">
<task>&lt;query-analyzer-id&gt;de&lt;...</task>
<code>doc("/db/lucene/text.xml")//l[ft:query(., 'herzen',
&lt;options&gt;&lt;query-analyzer-id&gt;de&lt;/query-analyzer-id&gt;&lt;/options&gt;
)]
</code>
<expected>
<l n="l1.12">Das will mir schier das Herz verbrennen.</l>
</expected>
</test>
<test output="xml">
<task>query-analyzer-id:de</task>
<code>doc("/db/lucene/text.xml")//l[ft:query(., 'herzen', map { "query-analyzer-id": "de" })]</code>
<expected>
<l n="l1.12">Das will mir schier das Herz verbrennen.</l>
</expected>
</test>
<test output="xml">
<task>&lt;query-analyzer-id&gt;keyword&lt;...</task>
<code>doc("/db/lucene/text.xml")//l[ft:query(., 'herzen',
&lt;options&gt;&lt;query-analyzer-id&gt;keyword&lt;/query-analyzer-id&gt;&lt;/options&gt;
)]
</code>
<expected/>
</test>
<test output="xml">
<task>query-analyzer-id:keyword</task>
<code>doc("/db/lucene/text.xml")//l[ft:query(., 'herzen', map { "query-analyzer-id": "keyword" })]</code>
<expected/>
</test>
<test output="xml">
<task>query new field "l_no-stop" with the GermanAnalyzer without stopwords</task>
<code>doc("/db/lucene/text.xml")//l[ft:query(., 'l_nostop:(ich OR bin)')]</code>
<expected>
<l n="l1.5">Da steh ich nun, ich armer Tor!</l>
<l n="l1.6">Und bin so klug als wie zuvor;</l>
</expected>
</test>
<test output="xml">
<task>query new field "l_no-stop" with the default GermanAnalyzer</task>
<code>doc("/db/lucene/text.xml")//l[ft:query(., 'l_nostop:(ich OR bin)', map { "query-analyzer-id": "de" })]</code>
<expected />
</test>
<test output="xml">
<task>query new field "l_ws" with the WhitespaceAnalyzer - no result</task>
<code>doc("/db/lucene/text.xml")//l[ft:query(., "l_ws:(nun\!)")]</code>
<expected />
</test>
<test output="xml">
<task>query new field "l_ws" with GermanAnalyzer</task>
<code>doc("/db/lucene/text.xml")//l[ft:query(., "l_ws:(nun\!)", map { "query-analyzer-id": "de" })]</code>
<expected />
</test>
<test output="xml">
<task>query new field "l_ws" with the WhitespaceAnalyzer - 2 results</task>
<code>doc("/db/lucene/text.xml")//l[ft:query(., "l_ws:(nun,)")]</code>
<expected>
<l n="l1.1">Habe nun, ach! Philosophie,</l>
<l n="l1.5">Da steh ich nun, ich armer Tor!</l>
</expected>
</test>
<test output="xml">
<task>query new field "l_ws" with the GermanAnalyzer - still no results</task>
<code>doc("/db/lucene/text.xml")//l[ft:query(., "l_ws:(nun,)", map { "query-analyzer-id": "de" })]</code>
<expected />
</test>
<test output="xml">
<task>query new field "l_ws" with the GermanAnalyzer and wildcard - 2 results</task>
<code>doc("/db/lucene/text.xml")//l[ft:query(., "l_ws:(nun*)", map { "query-analyzer-id": "de" })]</code>
<expected>
<l n="l1.1">Habe nun, ach! Philosophie,</l>
<l n="l1.5">Da steh ich nun, ich armer Tor!</l>
</expected>
</test>
<test output="xml">
<task>Query field with standard analyzer, no match</task>
<code>ft:query-field("line", "herzen")</code>
<expected/>
</test>
<test output="xml">
<task>Query field with query analyzer overridden (options as map)</task>
<code>ft:query-field("line", "herzen", map { "query-analyzer-id": "de" })</code>
<expected>
<l n="l1.12">Das will mir schier das Herz verbrennen.</l>
</expected>
</test>
<test output="xml">
<task>Query field with query analyzer overridden (options as xml)</task>
<code>ft:query-field("line", "herzen",
&lt;options&gt;&lt;query-analyzer-id&gt;de&lt;/query-analyzer-id&gt;&lt;/options&gt;
)
</code>
<expected>
<l n="l1.12">Das will mir schier das Herz verbrennen.</l>
</expected>
</test>
<test output="xml">
<task>Query field with standard analyzer and without context</task>
<code>ft:query-field("line", 'klug')</code>
Expand Down