Making IndexQuery usable in web context: using parsing.lucene's Index, creating a query method to be consumed by the web frontend.
authorJan Lahoda <jlahoda@netbeans.org>
Mon, 16 Jul 2012 16:54:54 +0200
changeset 8320aef2b581506
parent 831 81dc1afaf5fa
child 833 63bae9907b7a
Making IndexQuery usable in web context: using parsing.lucene's Index, creating a query method to be consumed by the web frontend.
language/ide/indexing/nbproject/genfiles.properties
language/ide/indexing/nbproject/project.xml
language/ide/indexing/src/org/netbeans/modules/jackpot30/indexing/index/IndexQuery.java
     1.1 --- a/language/ide/indexing/nbproject/genfiles.properties	Mon Jul 16 16:16:38 2012 +0200
     1.2 +++ b/language/ide/indexing/nbproject/genfiles.properties	Mon Jul 16 16:54:54 2012 +0200
     1.3 @@ -3,6 +3,6 @@
     1.4  build.xml.stylesheet.CRC32=a56c6a5b@2.49
     1.5  # This file is used by a NetBeans-based IDE to track changes in generated files such as build-impl.xml.
     1.6  # Do not edit this file. You may delete it but then the IDE will never regenerate such files for you.
     1.7 -nbproject/build-impl.xml.data.CRC32=dd88bcdd
     1.8 +nbproject/build-impl.xml.data.CRC32=51835c0d
     1.9  nbproject/build-impl.xml.script.CRC32=d49586b3
    1.10 -nbproject/build-impl.xml.stylesheet.CRC32=238281d1@2.49
    1.11 +nbproject/build-impl.xml.stylesheet.CRC32=238281d1@2.52
     2.1 --- a/language/ide/indexing/nbproject/project.xml	Mon Jul 16 16:16:38 2012 +0200
     2.2 +++ b/language/ide/indexing/nbproject/project.xml	Mon Jul 16 16:54:54 2012 +0200
     2.3 @@ -214,6 +214,15 @@
     2.4                      </run-dependency>
     2.5                  </dependency>
     2.6                  <dependency>
     2.7 +                    <code-name-base>org.netbeans.modules.parsing.lucene</code-name-base>
     2.8 +                    <build-prerequisite/>
     2.9 +                    <compile-dependency/>
    2.10 +                    <run-dependency>
    2.11 +                        <release-version>2</release-version>
    2.12 +                        <implementation-version/>
    2.13 +                    </run-dependency>
    2.14 +                </dependency>
    2.15 +                <dependency>
    2.16                      <code-name-base>org.netbeans.modules.projectapi</code-name-base>
    2.17                      <build-prerequisite/>
    2.18                      <compile-dependency/>
     3.1 --- a/language/ide/indexing/src/org/netbeans/modules/jackpot30/indexing/index/IndexQuery.java	Mon Jul 16 16:16:38 2012 +0200
     3.2 +++ b/language/ide/indexing/src/org/netbeans/modules/jackpot30/indexing/index/IndexQuery.java	Mon Jul 16 16:54:54 2012 +0200
     3.3 @@ -46,36 +46,34 @@
     3.4  import java.net.URISyntaxException;
     3.5  import java.net.URL;
     3.6  import java.util.ArrayList;
     3.7 -import java.util.BitSet;
     3.8  import java.util.Collection;
     3.9  import java.util.Collections;
    3.10  import java.util.HashMap;
    3.11  import java.util.List;
    3.12  import java.util.Map;
    3.13  import java.util.zip.DataFormatException;
    3.14 +import org.apache.lucene.analysis.KeywordAnalyzer;
    3.15  import org.apache.lucene.document.CompressionTools;
    3.16  import org.apache.lucene.document.Document;
    3.17  import org.apache.lucene.document.FieldSelector;
    3.18  import org.apache.lucene.document.FieldSelectorResult;
    3.19 -import org.apache.lucene.index.IndexReader;
    3.20  import org.apache.lucene.index.Term;
    3.21  import org.apache.lucene.queryParser.ParseException;
    3.22  import org.apache.lucene.search.BooleanClause;
    3.23  import org.apache.lucene.search.BooleanQuery;
    3.24 -import org.apache.lucene.search.Collector;
    3.25 -import org.apache.lucene.search.IndexSearcher;
    3.26  import org.apache.lucene.search.PhraseQuery;
    3.27  import org.apache.lucene.search.Query;
    3.28 -import org.apache.lucene.search.Searcher;
    3.29  import org.apache.lucene.search.TermQuery;
    3.30 -import org.apache.lucene.store.FSDirectory;
    3.31  import org.netbeans.api.annotations.common.NullAllowed;
    3.32 -import org.netbeans.modules.jackpot30.common.api.LuceneHelpers.BitSetCollector;
    3.33  import org.netbeans.modules.jackpot30.remoting.api.RemoteIndex;
    3.34  import org.netbeans.modules.jackpot30.remoting.api.WebUtilities;
    3.35  import org.netbeans.modules.java.hints.providers.spi.HintDescription.AdditionalQueryConstraints;
    3.36  import org.netbeans.modules.java.hints.spiimpl.pm.BulkSearch;
    3.37  import org.netbeans.modules.java.hints.spiimpl.pm.BulkSearch.BulkPattern;
    3.38 +import org.netbeans.modules.parsing.lucene.support.Convertor;
    3.39 +import org.netbeans.modules.parsing.lucene.support.Index;
    3.40 +import org.netbeans.modules.parsing.lucene.support.Index.Status;
    3.41 +import org.netbeans.modules.parsing.lucene.support.IndexManager;
    3.42  import org.openide.filesystems.FileObject;
    3.43  import org.openide.filesystems.FileUtil;
    3.44  import org.openide.util.Exceptions;
    3.45 @@ -89,7 +87,93 @@
    3.46      public abstract Collection<? extends String> findCandidates(BulkPattern pattern) throws IOException;
    3.47  
    3.48      public abstract Map<String, Map<String, Integer>> findCandidatesWithFrequencies(BulkPattern pattern) throws IOException;
    3.49 -    
    3.50 +
    3.51 +    public static Map<String, Map<String, Integer>> performLocalQuery(Index index, final BulkPattern pattern, final boolean withFrequencies) throws IOException, InterruptedException, ParseException {
    3.52 +        final Map<String, Map<String, Integer>> result = new HashMap<String, Map<String, Integer>>();
    3.53 +
    3.54 +        index.query(new ArrayList<Object>(), new Convertor<Document, Object>() {
    3.55 +            @Override public Object convert(Document doc) {
    3.56 +                try {
    3.57 +                    ByteArrayInputStream in = new ByteArrayInputStream(CompressionTools.decompress(doc.getField("encoded").getBinaryValue()));
    3.58 +
    3.59 +                    try {
    3.60 +                        Map<String, Integer> freqs;
    3.61 +                        boolean matches;
    3.62 +
    3.63 +                        if (withFrequencies) {
    3.64 +                            freqs = BulkSearch.getDefault().matchesWithFrequencies(in, pattern);
    3.65 +                            matches = !freqs.isEmpty();
    3.66 +                        } else {
    3.67 +                            freqs = null;
    3.68 +                            matches = BulkSearch.getDefault().matches(in, pattern);
    3.69 +                        }
    3.70 +
    3.71 +                        if (matches) {
    3.72 +                            result.put(doc.getField("path").stringValue(), freqs);
    3.73 +                        }
    3.74 +                    } finally {
    3.75 +                        in.close();
    3.76 +                    }
    3.77 +                } catch (DataFormatException ex) {
    3.78 +                    throw new IllegalStateException(ex);
    3.79 +                } catch (IOException ex) {
    3.80 +                    throw new IllegalStateException(ex);
    3.81 +                }
    3.82 +
    3.83 +                return null;
    3.84 +            }
    3.85 +        }, new FieldSelector() {
    3.86 +            public FieldSelectorResult accept(String string) {
    3.87 +                return "encoded".equals(string) || "path".equals(string) ? FieldSelectorResult.LOAD : FieldSelectorResult.NO_LOAD;
    3.88 +            }
    3.89 +        }, null, query(pattern));
    3.90 +
    3.91 +        return result;
    3.92 +    }
    3.93 +
    3.94 +    private static Query query(BulkPattern pattern) throws ParseException {
    3.95 +        BooleanQuery result = new BooleanQuery();
    3.96 +
    3.97 +        for (int cntr = 0; cntr < pattern.getIdentifiers().size(); cntr++) {
    3.98 +            assert !pattern.getRequiredContent().get(cntr).isEmpty();
    3.99 +
   3.100 +            BooleanQuery emb = new BooleanQuery();
   3.101 +
   3.102 +            for (List<String> c : pattern.getRequiredContent().get(cntr)) {
   3.103 +                if (c.isEmpty()) continue;
   3.104 +
   3.105 +                PhraseQuery pq = new PhraseQuery();
   3.106 +
   3.107 +                for (String s : c) {
   3.108 +                    pq.add(new Term("content", s));
   3.109 +                }
   3.110 +
   3.111 +                emb.add(pq, BooleanClause.Occur.MUST);
   3.112 +            }
   3.113 +
   3.114 +            AdditionalQueryConstraints additionalConstraints = pattern.getAdditionalConstraints().get(cntr);
   3.115 +
   3.116 +            if (additionalConstraints != null && !additionalConstraints.requiredErasedTypes.isEmpty()) {
   3.117 +                BooleanQuery constraintsQuery = new BooleanQuery();
   3.118 +
   3.119 +                constraintsQuery.add(new TermQuery(new Term("attributed", "false")), BooleanClause.Occur.SHOULD);
   3.120 +
   3.121 +                BooleanQuery constr = new BooleanQuery();
   3.122 +
   3.123 +                for (String tc : additionalConstraints.requiredErasedTypes) {
   3.124 +                    constr.add(new TermQuery(new Term("erasedTypes", tc)), BooleanClause.Occur.MUST);
   3.125 +                }
   3.126 +
   3.127 +                constraintsQuery.add(constr, BooleanClause.Occur.SHOULD);
   3.128 +                emb.add(constraintsQuery, BooleanClause.Occur.MUST);
   3.129 +            }
   3.130 +
   3.131 +            result.add(emb, BooleanClause.Occur.SHOULD);
   3.132 +        }
   3.133 +
   3.134 +        return result;
   3.135 +    }
   3.136 +
   3.137      private static final class LocalIndexQuery extends IndexQuery {
   3.138          private final @NullAllowed File cacheDir;
   3.139  
   3.140 @@ -106,107 +190,23 @@
   3.141          }
   3.142  
   3.143          private Map<String, Map<String, Integer>> findCandidates(BulkPattern pattern, boolean withFrequencies) throws IOException {
   3.144 -            IndexReader reader = cacheDir != null ? IndexReader.open(FSDirectory.open(cacheDir)) : null;
   3.145 +            Index index = IndexManager.createIndex(cacheDir, new KeywordAnalyzer());
   3.146  
   3.147 -            if (reader == null) {
   3.148 +            if (index.getStatus(true) != Status.VALID) {
   3.149                   return Collections.emptyMap();
   3.150              }
   3.151  
   3.152              try {
   3.153 -            Searcher s = new IndexSearcher(reader);
   3.154 -            BitSet matchingDocuments = new BitSet(reader.maxDoc());
   3.155 -            Collector c = new BitSetCollector(matchingDocuments);
   3.156 -
   3.157 -            try {
   3.158 -                s.search(query(pattern), c);
   3.159 +                return performLocalQuery(index, pattern, withFrequencies);
   3.160 +            } catch (InterruptedException ex) {
   3.161 +                throw new IOException(ex);
   3.162              } catch (ParseException ex) {
   3.163                  throw new IOException(ex);
   3.164 -            }
   3.165 -
   3.166 -            Map<String, Map<String, Integer>> result = new HashMap<String, Map<String, Integer>>();
   3.167 -
   3.168 -            for (int docNum = matchingDocuments.nextSetBit(0); docNum >= 0; docNum = matchingDocuments.nextSetBit(docNum+1)) {
   3.169 -                try {
   3.170 -                    final Document doc = reader.document(docNum, new FieldSelector() {
   3.171 -                        public FieldSelectorResult accept(String string) {
   3.172 -                            return "encoded".equals(string) || "path".equals(string) ? FieldSelectorResult.LOAD : FieldSelectorResult.NO_LOAD;
   3.173 -                        }
   3.174 -                    });
   3.175 -
   3.176 -                    ByteArrayInputStream in = new ByteArrayInputStream(CompressionTools.decompress(doc.getField("encoded").getBinaryValue()));
   3.177 -
   3.178 -                    try {
   3.179 -                        Map<String, Integer> freqs;
   3.180 -                        boolean matches;
   3.181 -
   3.182 -                        if (withFrequencies) {
   3.183 -                            freqs = BulkSearch.getDefault().matchesWithFrequencies(in, pattern);
   3.184 -                            matches = !freqs.isEmpty();
   3.185 -                        } else {
   3.186 -                            freqs = null;
   3.187 -                            matches = BulkSearch.getDefault().matches(in, pattern);
   3.188 -                        }
   3.189 -
   3.190 -                        if (matches) {
   3.191 -                            result.put(doc.getField("path").stringValue(), freqs);
   3.192 -                            continue;
   3.193 -                        }
   3.194 -                    } finally {
   3.195 -                        in.close();
   3.196 -                    }
   3.197 -                } catch (DataFormatException ex) {
   3.198 -                    throw new IOException(ex);
   3.199 -                }
   3.200 -            }
   3.201 -
   3.202 -            return result;
   3.203              } finally {
   3.204 -                reader.close();
   3.205 +                index.close();
   3.206              }
   3.207          }
   3.208  
   3.209 -        private Query query(BulkPattern pattern) throws ParseException {
   3.210 -            BooleanQuery result = new BooleanQuery();
   3.211 -
   3.212 -            for (int cntr = 0; cntr < pattern.getIdentifiers().size(); cntr++) {
   3.213 -                assert !pattern.getRequiredContent().get(cntr).isEmpty();
   3.214 -
   3.215 -                BooleanQuery emb = new BooleanQuery();
   3.216 -
   3.217 -                for (List<String> c : pattern.getRequiredContent().get(cntr)) {
   3.218 -                    if (c.isEmpty()) continue;
   3.219 -
   3.220 -                    PhraseQuery pq = new PhraseQuery();
   3.221 -
   3.222 -                    for (String s : c) {
   3.223 -                        pq.add(new Term("content", s));
   3.224 -                    }
   3.225 -
   3.226 -                    emb.add(pq, BooleanClause.Occur.MUST);
   3.227 -                }
   3.228 -
   3.229 -                AdditionalQueryConstraints additionalConstraints = pattern.getAdditionalConstraints().get(cntr);
   3.230 -
   3.231 -                if (additionalConstraints != null && !additionalConstraints.requiredErasedTypes.isEmpty()) {
   3.232 -                    BooleanQuery constraintsQuery = new BooleanQuery();
   3.233 -
   3.234 -                    constraintsQuery.add(new TermQuery(new Term("attributed", "false")), BooleanClause.Occur.SHOULD);
   3.235 -
   3.236 -                    BooleanQuery constr = new BooleanQuery();
   3.237 -
   3.238 -                    for (String tc : additionalConstraints.requiredErasedTypes) {
   3.239 -                        constr.add(new TermQuery(new Term("erasedTypes", tc)), BooleanClause.Occur.MUST);
   3.240 -                    }
   3.241 -
   3.242 -                    constraintsQuery.add(constr, BooleanClause.Occur.SHOULD);
   3.243 -                    emb.add(constraintsQuery, BooleanClause.Occur.MUST);
   3.244 -                }
   3.245 -
   3.246 -                result.add(emb, BooleanClause.Occur.SHOULD);
   3.247 -            }
   3.248 -
   3.249 -            return result;
   3.250 -        }
   3.251      }
   3.252      
   3.253      private static final class RemoteIndexQuery extends IndexQuery {