Generating index statistics, source indexer should be using the correct encoding now.
authorJan Lahoda <jlahoda@netbeans.org>
Fri, 17 Aug 2012 14:53:41 +0200
changeset 85214b2dae1089d
parent 851 6e261fe869d7
child 853 8d169fc210ea
Generating index statistics, source indexer should be using the correct encoding now.
remoting/server/indexer/impl/src/org/netbeans/modules/jackpot30/backend/impl/OptionProcessorImpl.java
remoting/server/indexer/impl/src/org/netbeans/modules/jackpot30/backend/impl/spi/StatisticsGenerator.java
remoting/server/indexer/source/nbproject/project.xml
remoting/server/indexer/source/src/org/netbeans/modules/jackpot30/indexer/source/SourceIndexer.java
remoting/server/indexer/source/src/org/netbeans/modules/jackpot30/indexer/source/SourceStatisticsGenerator.java
remoting/server/indexer/usages/src/org/netbeans/modules/jackpot30/indexer/usages/UsagesStatisticsGenerator.java
     1.1 --- a/remoting/server/indexer/impl/src/org/netbeans/modules/jackpot30/backend/impl/OptionProcessorImpl.java	Thu Aug 16 00:08:29 2012 +0200
     1.2 +++ b/remoting/server/indexer/impl/src/org/netbeans/modules/jackpot30/backend/impl/OptionProcessorImpl.java	Fri Aug 17 14:53:41 2012 +0200
     1.3 @@ -46,10 +46,11 @@
     1.4  import java.io.FileOutputStream;
     1.5  import java.io.IOException;
     1.6  import java.io.InputStream;
     1.7 -import java.net.URL;
     1.8  import java.util.Arrays;
     1.9 +import java.util.Collections;
    1.10  import java.util.HashSet;
    1.11  import java.util.Map;
    1.12 +import java.util.Map.Entry;
    1.13  import java.util.Properties;
    1.14  import java.util.Set;
    1.15  import java.util.jar.JarOutputStream;
    1.16 @@ -58,6 +59,7 @@
    1.17  import java.util.zip.ZipEntry;
    1.18  import org.apache.lucene.analysis.KeywordAnalyzer;
    1.19  import org.apache.lucene.index.CorruptIndexException;
    1.20 +import org.apache.lucene.index.IndexReader;
    1.21  import org.apache.lucene.index.IndexWriter;
    1.22  import org.apache.lucene.store.FSDirectory;
    1.23  import org.netbeans.api.java.classpath.ClassPath;
    1.24 @@ -70,6 +72,7 @@
    1.25  import org.netbeans.api.project.ui.OpenProjects;
    1.26  import org.netbeans.api.sendopts.CommandException;
    1.27  import org.netbeans.modules.jackpot30.backend.impl.spi.IndexAccessor;
    1.28 +import org.netbeans.modules.jackpot30.backend.impl.spi.StatisticsGenerator;
    1.29  import org.netbeans.modules.parsing.impl.indexing.CacheFolder;
    1.30  import org.netbeans.spi.java.classpath.support.ClassPathSupport;
    1.31  import org.netbeans.spi.project.support.ant.PropertyUtils;
    1.32 @@ -174,6 +177,27 @@
    1.33              }
    1.34          }
    1.35  
    1.36 +        Map<String, Long> statistics = Collections.emptyMap();
    1.37 +        IndexReader r = null;
    1.38 +
    1.39 +        try {
    1.40 +            r = IndexReader.open(FSDirectory.open(FileUtil.toFile(cacheTemp)), true);
    1.41 +
    1.42 +            statistics = StatisticsGenerator.generateStatistics(r);
    1.43 +        } catch (CorruptIndexException ex) {
    1.44 +            Exceptions.printStackTrace(ex);
    1.45 +        } catch (IOException ex) {
    1.46 +            Exceptions.printStackTrace(ex);
    1.47 +        } finally {
    1.48 +            if (r != null) {
    1.49 +                try {
    1.50 +                    r.close();
    1.51 +                } catch (IOException ex) {
    1.52 +                    Exceptions.printStackTrace(ex);
    1.53 +                }
    1.54 +            }
    1.55 +        }
    1.56 +
    1.57          JarOutputStream out = null;
    1.58          InputStream segments = null;
    1.59  
    1.60 @@ -221,6 +245,14 @@
    1.61                      out.write((",\n\"" + infoValue.substring(0, eqSign) + "\": \"" + infoValue.substring(eqSign + 1) + "\"").getBytes("UTF-8"));
    1.62                  }
    1.63              }
    1.64 +            out.write(",\n \"statistics\" : {\n".getBytes("UTF-8"));
    1.65 +            boolean wasEntry = false;
    1.66 +            for (Entry<String, Long> e : statistics.entrySet()) {
    1.67 +                if (wasEntry) out.write(", \n".getBytes("UTF-8"));
    1.68 +                out.write(("\"" + e.getKey() + "\" : " + e.getValue()).getBytes("UTF-8"));
    1.69 +                wasEntry = true;
    1.70 +            }
    1.71 +            out.write("\n}\n".getBytes("UTF-8"));
    1.72              out.write("\n}\n".getBytes("UTF-8"));
    1.73  
    1.74              for (FileObject s : cacheFolder.getChildren()) {
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/remoting/server/indexer/impl/src/org/netbeans/modules/jackpot30/backend/impl/spi/StatisticsGenerator.java	Fri Aug 17 14:53:41 2012 +0200
     2.3 @@ -0,0 +1,102 @@
     2.4 +/*
     2.5 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
     2.6 + *
     2.7 + * Copyright 2012 Oracle and/or its affiliates. All rights reserved.
     2.8 + *
     2.9 + * Oracle and Java are registered trademarks of Oracle and/or its affiliates.
    2.10 + * Other names may be trademarks of their respective owners.
    2.11 + *
    2.12 + * The contents of this file are subject to the terms of either the GNU
    2.13 + * General Public License Version 2 only ("GPL") or the Common
    2.14 + * Development and Distribution License("CDDL") (collectively, the
    2.15 + * "License"). You may not use this file except in compliance with the
    2.16 + * License. You can obtain a copy of the License at
    2.17 + * http://www.netbeans.org/cddl-gplv2.html
    2.18 + * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
    2.19 + * specific language governing permissions and limitations under the
    2.20 + * License.  When distributing the software, include this License Header
    2.21 + * Notice in each file and include the License file at
    2.22 + * nbbuild/licenses/CDDL-GPL-2-CP.  Oracle designates this
    2.23 + * particular file as subject to the "Classpath" exception as provided
    2.24 + * by Oracle in the GPL Version 2 section of the License file that
    2.25 + * accompanied this code. If applicable, add the following below the
    2.26 + * License Header, with the fields enclosed by brackets [] replaced by
    2.27 + * your own identifying information:
    2.28 + * "Portions Copyrighted [year] [name of copyright owner]"
    2.29 + *
    2.30 + * If you wish your version of this file to be governed by only the CDDL
    2.31 + * or only the GPL Version 2, indicate your decision by adding
    2.32 + * "[Contributor] elects to include this software in this distribution
    2.33 + * under the [CDDL or GPL Version 2] license." If you do not indicate a
    2.34 + * single choice of license, a recipient has the option to distribute
    2.35 + * your version of this file under either the CDDL, the GPL Version 2 or
    2.36 + * to extend the choice of license to its licensees as provided above.
    2.37 + * However, if you add GPL Version 2 code and therefore, elected the GPL
    2.38 + * Version 2 license, then the option applies only if the new code is
    2.39 + * made subject to such option by the copyright holder.
    2.40 + *
    2.41 + * Contributor(s):
    2.42 + *
    2.43 + * Portions Copyrighted 2012 Sun Microsystems, Inc.
    2.44 + */
    2.45 +package org.netbeans.modules.jackpot30.backend.impl.spi;
    2.46 +
    2.47 +import java.io.IOException;
    2.48 +import java.util.Collection;
    2.49 +import java.util.HashMap;
    2.50 +import java.util.Map;
    2.51 +import org.apache.lucene.document.Document;
    2.52 +import org.apache.lucene.document.FieldSelector;
    2.53 +import org.apache.lucene.document.FieldSelectorResult;
    2.54 +import org.apache.lucene.index.IndexReader;
    2.55 +import org.openide.util.Lookup;
    2.56 +
    2.57 +/**
    2.58 + *
    2.59 + * @author lahvac
    2.60 + */
    2.61 +public abstract class StatisticsGenerator {
    2.62 +
    2.63 +    protected abstract void amendStatistics(IndexReader r, Document doc) throws IOException;
    2.64 +
    2.65 +    public static Map<String, Long> generateStatistics(IndexReader r) throws IOException {
    2.66 +        statistics = new HashMap<String, Long>();
    2.67 +
    2.68 +        Collection<? extends StatisticsGenerator> generators = Lookup.getDefault().lookupAll(StatisticsGenerator.class);
    2.69 +
    2.70 +        int maxDocs = r.maxDoc();
    2.71 +
    2.72 +        for (int d = 0; d < maxDocs; d++) {
    2.73 +            Document doc = r.document(d, new FieldSelector() {
    2.74 +                @Override public FieldSelectorResult accept(String string) {
    2.75 +                    return FieldSelectorResult.LAZY_LOAD;
    2.76 +                }
    2.77 +            });
    2.78 +
    2.79 +            for (StatisticsGenerator sg : generators) {
    2.80 +                sg.amendStatistics(r, doc);
    2.81 +            }
    2.82 +        }
    2.83 +        
    2.84 +        Map<String, Long> result = statistics;
    2.85 +
    2.86 +        statistics = null;
    2.87 +
    2.88 +        return result;
    2.89 +    }
    2.90 +
    2.91 +    private static Map<String, Long> statistics;
    2.92 +
    2.93 +    protected final void increment(String key) {
    2.94 +        add(key, 1);
    2.95 +    }
    2.96 +
    2.97 +    protected void add(String key, long count) {
    2.98 +        Long val = statistics.get(key);
    2.99 +
   2.100 +        if (val == null) val = 0L;
   2.101 +
   2.102 +        statistics.put(key, val + count);
   2.103 +    }
   2.104 +
   2.105 +}
     3.1 --- a/remoting/server/indexer/source/nbproject/project.xml	Thu Aug 16 00:08:29 2012 +0200
     3.2 +++ b/remoting/server/indexer/source/nbproject/project.xml	Fri Aug 17 14:53:41 2012 +0200
     3.3 @@ -42,6 +42,15 @@
     3.4                      </run-dependency>
     3.5                  </dependency>
     3.6                  <dependency>
     3.7 +                    <code-name-base>org.netbeans.modules.queries</code-name-base>
     3.8 +                    <build-prerequisite/>
     3.9 +                    <compile-dependency/>
    3.10 +                    <run-dependency>
    3.11 +                        <release-version>1</release-version>
    3.12 +                        <specification-version>1.29</specification-version>
    3.13 +                    </run-dependency>
    3.14 +                </dependency>
    3.15 +                <dependency>
    3.16                      <code-name-base>org.openide.filesystems</code-name-base>
    3.17                      <build-prerequisite/>
    3.18                      <compile-dependency/>
    3.19 @@ -49,6 +58,14 @@
    3.20                          <specification-version>7.48</specification-version>
    3.21                      </run-dependency>
    3.22                  </dependency>
    3.23 +                <dependency>
    3.24 +                    <code-name-base>org.openide.util.lookup</code-name-base>
    3.25 +                    <build-prerequisite/>
    3.26 +                    <compile-dependency/>
    3.27 +                    <run-dependency>
    3.28 +                        <specification-version>8.16</specification-version>
    3.29 +                    </run-dependency>
    3.30 +                </dependency>
    3.31              </module-dependencies>
    3.32              <public-packages/>
    3.33          </data>
     4.1 --- a/remoting/server/indexer/source/src/org/netbeans/modules/jackpot30/indexer/source/SourceIndexer.java	Thu Aug 16 00:08:29 2012 +0200
     4.2 +++ b/remoting/server/indexer/source/src/org/netbeans/modules/jackpot30/indexer/source/SourceIndexer.java	Fri Aug 17 14:53:41 2012 +0200
     4.3 @@ -45,7 +45,7 @@
     4.4  import java.io.IOException;
     4.5  import java.io.InputStreamReader;
     4.6  import java.io.Reader;
     4.7 -import java.net.URL;
     4.8 +import java.nio.charset.Charset;
     4.9  import java.util.logging.Level;
    4.10  import java.util.logging.Logger;
    4.11  import org.apache.lucene.document.CompressionTools;
    4.12 @@ -54,11 +54,14 @@
    4.13  import org.apache.lucene.document.Field.Index;
    4.14  import org.apache.lucene.document.Field.Store;
    4.15  import org.netbeans.api.editor.mimelookup.MimeRegistration;
    4.16 +import org.netbeans.api.queries.FileEncodingQuery;
    4.17  import org.netbeans.modules.jackpot30.backend.impl.spi.IndexAccessor;
    4.18  import org.netbeans.modules.parsing.spi.indexing.Context;
    4.19  import org.netbeans.modules.parsing.spi.indexing.CustomIndexer;
    4.20  import org.netbeans.modules.parsing.spi.indexing.CustomIndexerFactory;
    4.21  import org.netbeans.modules.parsing.spi.indexing.Indexable;
    4.22 +import org.openide.filesystems.FileObject;
    4.23 +import org.openide.filesystems.URLMapper;
    4.24  
    4.25  /**
    4.26   *
    4.27 @@ -76,11 +79,20 @@
    4.28                  String relPath = IndexAccessor.getCurrent().getPath(i.getURL());
    4.29  
    4.30                  if (relPath == null) continue;
    4.31 +
    4.32 +                FileObject file = URLMapper.findFileObject(i.getURL());
    4.33 +
    4.34 +                if (file == null) {
    4.35 +                    //TODO: log
    4.36 +                    continue;
    4.37 +                }
    4.38                  
    4.39                  Document doc = new Document();
    4.40  
    4.41                  doc.add(new Field("relativePath", relPath, Store.YES, Index.NOT_ANALYZED));
    4.42 -                doc.add(new Field(KEY_CONTENT, CompressionTools.compressString(readFully(i.getURL())), Store.YES));
    4.43 +                doc.add(new Field(KEY_CONTENT, CompressionTools.compressString(readFully(file)), Store.YES));
    4.44 +                doc.add(new Field("fileMimeType", file.getMIMEType(), Store.YES, Index.NO));
    4.45 +                doc.add(new Field("sizeInBytes", Long.toString(file.getSize()), Store.YES, Index.NO));
    4.46  
    4.47                  IndexAccessor.getCurrent().getIndexWriter().addDocument(doc);
    4.48              }
    4.49 @@ -89,12 +101,13 @@
    4.50          }
    4.51      }
    4.52  
    4.53 -    private static String readFully(URL source) throws IOException {
    4.54 +    private static String readFully(FileObject source) throws IOException {
    4.55          Reader in = null;
    4.56          StringBuilder result = new StringBuilder();
    4.57  
    4.58          try {
    4.59 -            in = new BufferedReader(new InputStreamReader(source.openStream(), "UTF-8"));
    4.60 +            Charset charset = FileEncodingQuery.getEncoding(source);
    4.61 +            in = new BufferedReader(new InputStreamReader(source.getInputStream(), charset));
    4.62  
    4.63              int read;
    4.64  
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/remoting/server/indexer/source/src/org/netbeans/modules/jackpot30/indexer/source/SourceStatisticsGenerator.java	Fri Aug 17 14:53:41 2012 +0200
     5.3 @@ -0,0 +1,77 @@
     5.4 +/*
     5.5 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
     5.6 + *
     5.7 + * Copyright 2012 Oracle and/or its affiliates. All rights reserved.
     5.8 + *
     5.9 + * Oracle and Java are registered trademarks of Oracle and/or its affiliates.
    5.10 + * Other names may be trademarks of their respective owners.
    5.11 + *
    5.12 + * The contents of this file are subject to the terms of either the GNU
    5.13 + * General Public License Version 2 only ("GPL") or the Common
    5.14 + * Development and Distribution License("CDDL") (collectively, the
    5.15 + * "License"). You may not use this file except in compliance with the
    5.16 + * License. You can obtain a copy of the License at
    5.17 + * http://www.netbeans.org/cddl-gplv2.html
    5.18 + * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
    5.19 + * specific language governing permissions and limitations under the
    5.20 + * License.  When distributing the software, include this License Header
    5.21 + * Notice in each file and include the License file at
    5.22 + * nbbuild/licenses/CDDL-GPL-2-CP.  Oracle designates this
    5.23 + * particular file as subject to the "Classpath" exception as provided
    5.24 + * by Oracle in the GPL Version 2 section of the License file that
    5.25 + * accompanied this code. If applicable, add the following below the
    5.26 + * License Header, with the fields enclosed by brackets [] replaced by
    5.27 + * your own identifying information:
    5.28 + * "Portions Copyrighted [year] [name of copyright owner]"
    5.29 + *
    5.30 + * If you wish your version of this file to be governed by only the CDDL
    5.31 + * or only the GPL Version 2, indicate your decision by adding
    5.32 + * "[Contributor] elects to include this software in this distribution
    5.33 + * under the [CDDL or GPL Version 2] license." If you do not indicate a
    5.34 + * single choice of license, a recipient has the option to distribute
    5.35 + * your version of this file under either the CDDL, the GPL Version 2 or
    5.36 + * to extend the choice of license to its licensees as provided above.
    5.37 + * However, if you add GPL Version 2 code and therefore, elected the GPL
    5.38 + * Version 2 license, then the option applies only if the new code is
    5.39 + * made subject to such option by the copyright holder.
    5.40 + *
    5.41 + * Contributor(s):
    5.42 + *
    5.43 + * Portions Copyrighted 2012 Sun Microsystems, Inc.
    5.44 + */
    5.45 +package org.netbeans.modules.jackpot30.indexer.source;
    5.46 +
    5.47 +import java.io.IOException;
    5.48 +import org.apache.lucene.document.Document;
    5.49 +import org.apache.lucene.document.Fieldable;
    5.50 +import org.apache.lucene.index.IndexReader;
    5.51 +import org.netbeans.modules.jackpot30.backend.impl.spi.StatisticsGenerator;
    5.52 +import org.openide.util.lookup.ServiceProvider;
    5.53 +
    5.54 +/**
    5.55 + *
    5.56 + * @author lahvac
    5.57 + */
    5.58 +@ServiceProvider(service=StatisticsGenerator.class)
    5.59 +public class SourceStatisticsGenerator extends StatisticsGenerator {
    5.60 +
    5.61 +    @Override
    5.62 +    protected void amendStatistics(IndexReader r, Document doc) throws IOException {
    5.63 +        if (doc.getFieldable("relativePath") != null) {
    5.64 +            increment("files/total");
    5.65 +            Fieldable mimeType = doc.getFieldable("fileMimeType");
    5.66 +            if (mimeType != null) {
    5.67 +                increment("files/" + mimeType.stringValue());
    5.68 +            }
    5.69 +            Fieldable sizeInBytes = doc.getFieldable("sizeInBytes");
    5.70 +            if (sizeInBytes != null) {
    5.71 +                long size = Long.parseLong(sizeInBytes.stringValue());
    5.72 +                add("fileSize/total", size);
    5.73 +                if (mimeType != null) {
    5.74 +                    add("fileSize/" + mimeType.stringValue(), size);
    5.75 +                }
    5.76 +            }
    5.77 +        }
    5.78 +    }
    5.79 +
    5.80 +}
     6.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.2 +++ b/remoting/server/indexer/usages/src/org/netbeans/modules/jackpot30/indexer/usages/UsagesStatisticsGenerator.java	Fri Aug 17 14:53:41 2012 +0200
     6.3 @@ -0,0 +1,72 @@
     6.4 +/*
     6.5 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
     6.6 + *
     6.7 + * Copyright 2012 Oracle and/or its affiliates. All rights reserved.
     6.8 + *
     6.9 + * Oracle and Java are registered trademarks of Oracle and/or its affiliates.
    6.10 + * Other names may be trademarks of their respective owners.
    6.11 + *
    6.12 + * The contents of this file are subject to the terms of either the GNU
    6.13 + * General Public License Version 2 only ("GPL") or the Common
    6.14 + * Development and Distribution License("CDDL") (collectively, the
    6.15 + * "License"). You may not use this file except in compliance with the
    6.16 + * License. You can obtain a copy of the License at
    6.17 + * http://www.netbeans.org/cddl-gplv2.html
    6.18 + * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
    6.19 + * specific language governing permissions and limitations under the
    6.20 + * License.  When distributing the software, include this License Header
    6.21 + * Notice in each file and include the License file at
    6.22 + * nbbuild/licenses/CDDL-GPL-2-CP.  Oracle designates this
    6.23 + * particular file as subject to the "Classpath" exception as provided
    6.24 + * by Oracle in the GPL Version 2 section of the License file that
    6.25 + * accompanied this code. If applicable, add the following below the
    6.26 + * License Header, with the fields enclosed by brackets [] replaced by
    6.27 + * your own identifying information:
    6.28 + * "Portions Copyrighted [year] [name of copyright owner]"
    6.29 + *
    6.30 + * If you wish your version of this file to be governed by only the CDDL
    6.31 + * or only the GPL Version 2, indicate your decision by adding
    6.32 + * "[Contributor] elects to include this software in this distribution
    6.33 + * under the [CDDL or GPL Version 2] license." If you do not indicate a
    6.34 + * single choice of license, a recipient has the option to distribute
    6.35 + * your version of this file under either the CDDL, the GPL Version 2 or
    6.36 + * to extend the choice of license to its licensees as provided above.
    6.37 + * However, if you add GPL Version 2 code and therefore, elected the GPL
    6.38 + * Version 2 license, then the option applies only if the new code is
    6.39 + * made subject to such option by the copyright holder.
    6.40 + *
    6.41 + * Contributor(s):
    6.42 + *
    6.43 + * Portions Copyrighted 2012 Sun Microsystems, Inc.
    6.44 + */
    6.45 +package org.netbeans.modules.jackpot30.indexer.usages;
    6.46 +
    6.47 +import java.io.IOException;
    6.48 +import javax.lang.model.element.ElementKind;
    6.49 +import org.apache.lucene.document.Document;
    6.50 +import org.apache.lucene.index.IndexReader;
    6.51 +import org.netbeans.modules.jackpot30.backend.impl.spi.StatisticsGenerator;
    6.52 +import org.openide.util.lookup.ServiceProvider;
    6.53 +
    6.54 +/**
    6.55 + *
    6.56 + * @author lahvac
    6.57 + */
    6.58 +@ServiceProvider(service=StatisticsGenerator.class)
    6.59 +public class UsagesStatisticsGenerator extends StatisticsGenerator {
    6.60 +
    6.61 +    @Override
    6.62 +    protected void amendStatistics(IndexReader r, Document doc) throws IOException {
    6.63 +        if (doc.getFieldable("classFQN") != null) increment("java-classes");
    6.64 +        else if (doc.getFieldable("featureClassFQN") != null) {
    6.65 +            ElementKind kind = ElementKind.valueOf(doc.getFieldable("featureKind").stringValue());
    6.66 +
    6.67 +            if (kind.isField()) {
    6.68 +                increment("java-fields");
    6.69 +            } else {
    6.70 +                increment("java-methods");
    6.71 +            }
    6.72 +        }
    6.73 +    }
    6.74 +
    6.75 +}