public Query constructQuery()

in indexer-core/src/main/java/org/apache/maven/index/DefaultQueryCreator.java [160:319]


    public Query constructQuery(
            final Field field, final IndexerField indexerField, final String query, final SearchType type)
            throws ParseException {
        if (indexerField == null) {
            getLogger()
                    .warn("Querying for field \"" + field.toString() + "\" without any indexer field was tried. "
                            + "Please review your code, and consider adding this field to index!");

            return null;
        }
        if (!indexerField.isIndexed()) {
            getLogger()
                    .warn("Querying for non-indexed field " + field.toString()
                            + " was tried. Please review your code or consider adding this field to index!");

            return null;
        }

        if (Field.NOT_PRESENT.equals(query)) {
            return new WildcardQuery(new Term(indexerField.getKey(), "*"));
        }

        if (SearchType.EXACT.equals(type)) {
            if (indexerField.isKeyword()) {
                // no tokenization should happen against the field!
                if (query.contains("*") || query.contains("?")) {
                    return new WildcardQuery(new Term(indexerField.getKey(), query));
                } else {
                    // exactly what callee wants
                    return new TermQuery(new Term(indexerField.getKey(), query));
                }
            } else if (!indexerField.isKeyword() && indexerField.isStored()) {
                // TODO: resolve this better! Decouple QueryCreator and IndexCreators!
                // This is a hack/workaround here
                if (JarFileContentsIndexCreator.FLD_CLASSNAMES_KW.equals(indexerField)) {
                    if (query.startsWith("/")) {
                        return new TermQuery(new Term(
                                indexerField.getKey(), query.toLowerCase().replaceAll("\\.", "/")));
                    } else {
                        return new TermQuery(new Term(
                                indexerField.getKey(), "/" + query.toLowerCase().replaceAll("\\.", "/")));
                    }
                } else {
                    getLogger()
                            .warn(type.toString()
                                    + " type of querying for non-keyword (but stored) field "
                                    + indexerField.getOntology().toString()
                                    + " was tried. Please review your code, or indexCreator involved, "
                                    + "since this type of querying of this field is currently unsupported.");

                    // will never succeed (unless we supply him "filter" too, but that would kill performance)
                    // and is possible with stored fields only
                    return null;
                }
            } else {
                getLogger()
                        .warn(type.toString()
                                + " type of querying for non-keyword (and not stored) field "
                                + indexerField.getOntology().toString()
                                + " was tried. Please review your code, or indexCreator involved, "
                                + "since this type of querying of this field is impossible.");

                // not a keyword indexerField, nor stored. No hope at all. Impossible even with "filtering"
                return null;
            }
        } else if (SearchType.SCORED.equals(type)) {
            if (JarFileContentsIndexCreator.FLD_CLASSNAMES.equals(indexerField)) {
                String qpQuery = query.toLowerCase().replaceAll("\\.", " ").replaceAll("/", " ");
                // tokenization should happen against the field!
                QueryParser qp = new QueryParser(indexerField.getKey(), new NexusAnalyzer());
                qp.setDefaultOperator(Operator.AND);
                return qp.parse(qpQuery);
            } else if (indexerField.isKeyword()) {
                // no tokenization should happen against the field!
                if (query.contains("*") || query.contains("?")) {
                    return new WildcardQuery(new Term(indexerField.getKey(), query));
                } else {
                    Term t = new Term(indexerField.getKey(), query);
                    return new BooleanQuery.Builder()
                            .add(new TermQuery(t), Occur.SHOULD)
                            .add(new BoostQuery(new PrefixQuery(t), 0.8f), Occur.SHOULD)
                            .build();
                }
            } else {
                // to save "original" query
                String qpQuery = query;

                // tokenization should happen against the field!
                QueryParser qp = new QueryParser(indexerField.getKey(), new NexusAnalyzer());
                qp.setDefaultOperator(Operator.AND);

                // small cheap trick
                // if a query is not "expert" (does not contain field:val kind of expression)
                // but it contains star and/or punctuation chars, example: "common-log*"
                // since Lucene does not support multi-terms WITH wildcards.
                // So, here, we "mimic" NexusAnalyzer (this should be fixed!)
                // but do this with PRESERVING original query!
                if (qpQuery.matches(".*(\\.|-|_|/).*")) {
                    qpQuery = qpQuery.toLowerCase()
                            .replaceAll("\\*", "X")
                            .replaceAll("\\.|-|_|/", " ")
                            .replaceAll("X", "*")
                            .replaceAll(" \\* ", "")
                            .replaceAll("^\\* ", "")
                            .replaceAll(" \\*$", "");
                }

                // "fix" it with trailing "*" if not there, but only if it not ends with a space
                if (!qpQuery.endsWith("*") && !qpQuery.endsWith(" ")) {
                    qpQuery += "*";
                }

                try {
                    // qpQuery = "\"" + qpQuery + "\"";

                    BooleanQuery.Builder q1b = new BooleanQuery.Builder().add(qp.parse(qpQuery), Occur.SHOULD);

                    if (qpQuery.contains(" ")) {
                        q1b.add(qp.parse("\"" + qpQuery + "\""), Occur.SHOULD);
                    }

                    Query q2 = null;

                    int termCount = countTerms(indexerField, query);

                    // try with KW only if the processed query in qpQuery does not have spaces!
                    if (!query.contains(" ") && termCount > 1) {
                        // get the KW field
                        IndexerField keywordField = selectIndexerField(indexerField.getOntology(), SearchType.EXACT);

                        if (keywordField.isKeyword()) {
                            q2 = constructQuery(indexerField.getOntology(), keywordField, query, type);
                        }
                    }

                    if (q2 == null) {
                        return q1b.build();
                    } else {
                        return new BooleanQuery.Builder()
                                // trick with order
                                .add(q2, Occur.SHOULD)
                                .add(q1b.build(), Occur.SHOULD)
                                .build();
                    }
                } catch (ParseException e) {
                    // TODO: we are not falling back anymore to legacy!
                    throw e;

                    // getLogger().debug(
                    // "Query parsing with \"legacy\" method, we got ParseException from QueryParser: "
                    // + e.getMessage() );
                    //
                    // return legacyConstructQuery( indexerField.getKey(), query );
                }
            }
        } else {
            // what search type is this?
            return null;
        }
    }