python.source/src/org/netbeans/modules/python/source/PythonParser.java
author Julien Enselme <jenselme@netbeans.org>
Tue, 27 Jun 2017 21:26:18 +0200
changeset 18429 517409415907
parent 18392 6ccd27d3f884
permissions -rw-r--r--
#251705: UnicodeDecodeError pop-up while entering \x code
     1 /*
     2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
     3  *
     4  * Copyright 1997-2010 Oracle and/or its affiliates. All rights reserved.
     5  *
     6  * Oracle and Java are registered trademarks of Oracle and/or its affiliates.
     7  * Other names may be trademarks of their respective owners.
     8  *
     9  * The contents of this file are subject to the terms of either the GNU
    10  * General Public License Version 2 only ("GPL") or the Common
    11  * Development and Distribution License("CDDL") (collectively, the
    12  * "License"). You may not use this file except in compliance with the
    13  * License. You can obtain a copy of the License at
    14  * http://www.netbeans.org/cddl-gplv2.html
    15  * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
    16  * specific language governing permissions and limitations under the
    17  * License.  When distributing the software, include this License Header
    18  * Notice in each file and include the License file at
    19  * nbbuild/licenses/CDDL-GPL-2-CP.  Oracle designates this
    20  * particular file as subject to the "Classpath" exception as provided
    21  * by Oracle in the GPL Version 2 section of the License file that
    22  * accompanied this code. If applicable, add the following below the
    23  * License Header, with the fields enclosed by brackets [] replaced by
    24  * your own identifying information:
    25  * "Portions Copyrighted [year] [name of copyright owner]"
    26  *
    27  * Contributor(s):
    28  *
    29  * Portions Copyrighted 2007 Sun Microsystems, Inc.
    30  */
    31 package org.netbeans.modules.python.source;
    32 
    33 import java.io.InputStream;
    34 import java.io.InputStreamReader;
    35 import java.util.ArrayList;
    36 import java.util.List;
    37 import java.util.logging.Level;
    38 import java.util.logging.Logger;
    39 import javax.swing.event.ChangeListener;
    40 import javax.swing.text.BadLocationException;
    41 import org.netbeans.modules.csl.api.Severity;
    42 import org.netbeans.modules.csl.spi.DefaultError;
    43 import org.netbeans.modules.csl.api.Error;
    44 import org.netbeans.modules.csl.api.OffsetRange;
    45 import org.netbeans.modules.csl.spi.GsfUtilities;
    46 import org.netbeans.modules.parsing.api.Snapshot;
    47 import org.netbeans.modules.parsing.api.Task;
    48 import org.netbeans.modules.parsing.spi.Parser;
    49 import org.netbeans.modules.parsing.spi.SourceModificationEvent;
    50 import org.netbeans.modules.python.api.PythonFileEncodingQuery;
    51 import org.openide.filesystems.FileObject;
    52 import org.python.antlr.runtime.ANTLRStringStream;
    53 import org.python.antlr.runtime.BaseRecognizer;
    54 import org.python.antlr.runtime.BitSet;
    55 import org.python.antlr.runtime.CommonToken;
    56 import org.python.antlr.runtime.CommonTokenStream;
    57 import org.python.antlr.runtime.IntStream;
    58 import org.python.antlr.runtime.Lexer;
    59 import org.python.antlr.runtime.MismatchedTokenException;
    60 import org.python.antlr.runtime.RecognitionException;
    61 
    62 import org.openide.filesystems.FileUtil;
    63 import org.openide.util.Exceptions;
    64 import org.python.antlr.ListErrorHandler;
    65 import org.python.antlr.ParseException;
    66 import org.python.antlr.PythonLexer;
    67 import org.python.antlr.PythonTokenSource;
    68 import org.python.antlr.PythonTree;
    69 import org.python.antlr.PythonTreeAdaptor;
    70 import org.python.antlr.base.expr;
    71 import org.python.antlr.base.mod;
    72 import org.python.antlr.base.slice;
    73 import org.python.antlr.base.stmt;
    74 import org.python.antlr.runtime.ANTLRReaderStream;
    75 import org.python.antlr.runtime.CharStream;
    76 import org.python.core.PyException;
    77 
    78 /**
    79  * Parser for Python. Wraps Jython.
    80  * 
    81  * @author Frank Wierzbicki
    82  * @author Tor Norbye
    83  */
    84 public class PythonParser extends Parser {
    85     /** For unit tests such that they can make sure we didn't have a parser abort */
    86     static Throwable runtimeException;
    87 
    88     static {
    89         org.python.core.PySystemState.initialize();
    90     }
    91     
    92     private Result lastResult;
    93     private final PythonFileEncodingQuery fileEncodingQuery = new PythonFileEncodingQuery();
    94     private String headerCached = null;
    95     private String encodingCache = null;
    96 
    97     public mod file_input(CharStream charStream, String fileName) throws RecognitionException {
    98         ListErrorHandler eh = new ListErrorHandler();
    99         mod tree = null;
   100         PythonLexer lexer = new PythonLexer(charStream);
   101         lexer.setErrorHandler(eh);
   102         CommonTokenStream tokens = new CommonTokenStream(lexer);
   103         tokens.discardOffChannelTokens(true);
   104         PythonTokenSource indentedSource = new PythonTokenSource(tokens, fileName);
   105         tokens = new CommonTokenStream(indentedSource);
   106         org.python.antlr.PythonParser parser = new org.python.antlr.PythonParser(tokens);
   107         parser.setTreeAdaptor(new PythonTreeAdaptor());
   108         parser.setErrorHandler(eh);
   109         org.python.antlr.PythonParser.file_input_return r = parser.file_input();
   110         tree = (mod)r.getTree();
   111         return tree;
   112     }
   113 
   114     @Override
   115     public void addChangeListener(ChangeListener changeListener) {}
   116 
   117     @Override
   118     public void removeChangeListener(ChangeListener changeListener) {}
   119     
   120     public PythonTree parse(InputStream istream, String fileName) throws Exception {
   121         InputStreamReader reader = new InputStreamReader(istream, "ISO-8859-1");
   122         return file_input(new ANTLRReaderStream(reader), fileName);
   123     }
   124     
   125     @Override
   126     public final Result getResult(Task task) throws org.netbeans.modules.parsing.spi.ParseException {
   127         return lastResult;
   128     }
   129     
   130     private static final Logger LOG = Logger.getLogger(PythonParser.class.getName());
   131 
   132     @Override
   133     public void parse(Snapshot snapshot, Task task, SourceModificationEvent event) throws org.netbeans.modules.parsing.spi.ParseException {
   134         Context context = new Context();
   135         context.snapshot = snapshot;
   136         context.event = event;
   137         context.task = task;
   138         context.caretOffset = GsfUtilities.getLastKnownCaretOffset(snapshot, event);
   139         context.source = snapshot.getText().toString();
   140         context.file = snapshot.getSource().getFileObject();
   141         if(context.file == null) {
   142             return; // TODO: parse the source, not the file
   143         }
   144         /* Let's not sanitize ;-) Would be great if we could have a more robust parser
   145         if (context.caretOffset != -1) {
   146             context.sanitized = Sanitize.EDITED_DOT;
   147         }
   148         */
   149         lastResult = parse(context, context.sanitized);
   150     }
   151     public PythonParserResult parse(final Context context, Sanitize sanitizing) {
   152         boolean sanitizedSource = false;
   153         String sourceCode = context.source;
   154         if (!((sanitizing == Sanitize.NONE) || (sanitizing == Sanitize.NEVER))) {
   155             boolean ok = sanitizeSource(context, sanitizing);
   156 
   157             if (ok) {
   158                 assert context.sanitizedSource != null;
   159                 sanitizedSource = true;
   160                 sourceCode = context.sanitizedSource;
   161             } else {
   162                 // Try next trick
   163                 return sanitize(context, sanitizing);
   164             }
   165         }
   166         final String source = sourceCode;
   167 
   168         if (sanitizing == Sanitize.NONE) {
   169             context.errorOffset = -1;
   170         }
   171 
   172         final List<Error> errors = new ArrayList<>();
   173         final FileObject file = context.file;
   174         try {
   175             String fileName = file.getNameExt();
   176             // TODO - sniff file headers etc. Frank's comment:
   177             // Longer term for Python compatibility, having NetBeans sniff the top two lines
   178             // for an encoding would be the right thing to do from a pure Python
   179             // compatibility standard (see http://www.python.org/dev/peps/pep-0263/) I
   180             // have pep-0263 code in Jython that I could probably extract for this
   181             // purpose down the road.
   182             //String charset = "ISO8859_1"; // NOI18N
   183             //String charset = "UTF-8"; // NOI18N
   184             //String charset = "iso8859_1"; // NOI18N
   185             // TODO: improve this check.
   186             int cache_len = sourceCode.length() >= 64 ? 64 : sourceCode.length();
   187             if (headerCached == null || cache_len != headerCached.length() || !headerCached.equals(sourceCode.substring(0, cache_len))) {
   188                 headerCached = sourceCode.substring(0, cache_len);
   189                 encodingCache = fileEncodingQuery.getPythonFileEncoding(sourceCode.split("\n", 2));                
   190             }
   191             String charset = encodingCache;            
   192                 
   193             final boolean ignoreErrors = sanitizedSource;
   194             ListErrorHandler errorHandler = new ListErrorHandler() {
   195                 @Override
   196                 public void error(String message, PythonTree t) {
   197                     errors.add(new DefaultError(null, message, null, file, t.getCharStartIndex(), t.getCharStopIndex(), Severity.ERROR));
   198                     super.error(message, t);
   199                 }
   200 
   201                 @Override
   202                 public expr errorExpr(PythonTree t) {
   203                     return super.errorExpr(t);
   204                 }
   205 
   206                 @Override
   207                 public mod errorMod(PythonTree t) {
   208                     return super.errorMod(t);
   209                 }
   210 
   211                 @Override
   212                 public slice errorSlice(PythonTree t) {
   213                     return super.errorSlice(t);
   214                 }
   215 
   216                 @Override
   217                 public stmt errorStmt(PythonTree t) {
   218                     return super.errorStmt(t);
   219                 }
   220 
   221                 @Override
   222                 public boolean mismatch(BaseRecognizer br, IntStream input, int ttype, BitSet follow) {
   223                     return super.mismatch(br, input, ttype, follow);
   224                 }
   225 
   226                 @Override
   227                 public Object recoverFromMismatchedToken(BaseRecognizer br, IntStream input, int ttype, BitSet follow) {
   228                     MismatchedTokenException mt = new MismatchedTokenException(ttype, input);
   229                     String message = br.getErrorMessage(mt, br.getTokenNames());
   230                     if (mt.line >= 1) {
   231                         int lineOffset = findLineOffset(context.source, mt.line-1);
   232                         if (mt.charPositionInLine > 0) {
   233                             lineOffset += mt.charPositionInLine;
   234                         }
   235                         int start = lineOffset;//t.getCharStartIndex();
   236                         int stop = lineOffset;//t.getCharStopIndex();
   237                         errors.add(new DefaultError(null, message, null, file, start, stop, Severity.ERROR));
   238                     }
   239                     return super.recoverFromMismatchedToken(br, input, ttype, follow);
   240                 }
   241 
   242                 @Override
   243                 public void recover(Lexer lex, RecognitionException re) {
   244                     super.recover(lex, re);
   245                 }
   246 
   247                 @Override
   248                 public void recover(BaseRecognizer br, IntStream input, RecognitionException re) {
   249                     super.recover(br, input, re);
   250                 }
   251 
   252                 @Override
   253                 public void reportError(BaseRecognizer br, RecognitionException re) {
   254                     if (!ignoreErrors) {
   255                         String message = br.getErrorMessage(re, br.getTokenNames());
   256                         if (message == null || message.length() == 0) {
   257                             message = re.getMessage();
   258                         }
   259                         if (message == null) {
   260                             //message = re.getUnexpectedType();
   261                             message = re.toString();
   262                         }
   263                         int start = re.index;
   264 
   265                         // Try to find the line offset. re.index doesn't do the trick.
   266                         start = PythonUtils.getOffsetByLineCol(source, re.line - 1, 0); // -1: 0-based
   267                         int end = start;
   268                         if (re.charPositionInLine > 0) {
   269                             try {
   270                                 end = GsfUtilities.getRowLastNonWhite(source, start) + 1;
   271                                 start += re.charPositionInLine;
   272                                 if (end < start) {
   273                                     end = start;
   274                                 }
   275                             } catch (BadLocationException ex) {
   276                                 Exceptions.printStackTrace(ex);
   277                                 end = start;
   278                             }
   279                             if (end == 0) {
   280                                 end = start;
   281                             }
   282                         }
   283 
   284                         // Some errors have better offsets if we look at the token stream
   285                         if (re instanceof MismatchedTokenException) {
   286                             MismatchedTokenException m = (MismatchedTokenException)re;
   287                             if (m.token != null) {
   288                                 if (m.token instanceof org.python.antlr.runtime.CommonToken) {
   289                                     CommonToken token = (org.python.antlr.runtime.CommonToken)m.token;
   290                                     start = token.getStartIndex();
   291                                     end = token.getStopIndex();
   292                                 }
   293                             }
   294                         }
   295 
   296                         if (start > source.length()) {
   297                             start = source.length();
   298                             end = start;
   299                         }
   300 
   301                         errors.add(new DefaultError(null, message, null, file, start, end, Severity.ERROR));
   302 
   303                         // In order to avoid a StackOverflowError, the BaseRecognizer must be recreated.
   304                         // We must keep the names of the tokens to avoid a NullPointerException.
   305                         // See bz252630
   306                         final String[] tokenNames = br.getTokenNames();
   307                         br = new BaseRecognizer() {
   308 
   309                             @Override
   310                             public String getSourceName() {
   311                                 return file.getName();
   312                             }
   313 
   314                             @Override
   315                             public String[] getTokenNames() {
   316                                 return tokenNames;
   317                             }
   318                         };
   319 
   320                         super.reportError(br, re);
   321                     }
   322                 }
   323             };
   324 
   325             PythonLexer lexer = new PythonLexer(new ANTLRStringStream(sourceCode));
   326             lexer.setErrorHandler(errorHandler);
   327             CommonTokenStream tokens = new CommonTokenStream(lexer);
   328             tokens.discardOffChannelTokens(true);
   329             PythonTokenSource indentedSource = new PythonTokenSource(tokens, fileName);
   330             CommonTokenStream indentedTokens = new CommonTokenStream(indentedSource);
   331             // Import line ending with a dot raise a NullPointerException in
   332             // org.python.antlr.GrammarActions.makeDottedText called from parser.file_input
   333             // sanitizeImportTokens will remove the dot token from the list of tokens in
   334             // indentedTokens to avoid the bug and add an error at this file.
   335             // See https://netbeans.org/bugzilla/show_bug.cgi?id=252356
   336             sanitizeImportTokens(indentedTokens, errors, file);
   337             org.python.antlr.PythonParser parser;
   338             if (charset != null) {
   339                 parser = new org.python.antlr.PythonParser(indentedTokens, charset);
   340             } else {
   341                 parser = new org.python.antlr.PythonParser(indentedTokens);
   342             }
   343             parser.setTreeAdaptor(new PythonTreeAdaptor());
   344             parser.setErrorHandler(errorHandler);
   345             org.python.antlr.PythonParser.file_input_return r = parser.file_input();
   346             PythonTree t = (PythonTree)r.getTree();
   347             PythonParserResult result = new PythonParserResult(t, context.snapshot);
   348             result.setErrors(errors);
   349 
   350             result.setSanitized(context.sanitized, context.sanitizedRange, context.sanitizedContents);
   351             result.setSource(sourceCode);
   352 
   353             return result;
   354         } catch (ParseException pe) {
   355             if (sanitizing == Sanitize.NONE) {
   356                 PythonParserResult sanitizedResult = sanitize(context, sanitizing);
   357                 if (sanitizedResult.isValid()) {
   358                     return sanitizedResult;
   359                 } else {
   360                     int offset = pe.index;
   361                     assert offset >= 0;
   362                     String desc = pe.getLocalizedMessage();
   363                     if (desc == null) {
   364                         desc = pe.getMessage();
   365                     }
   366                     DefaultError error = new DefaultError(null /*key*/, desc, null, file, offset, offset, Severity.ERROR);
   367                     PythonParserResult parserResult = new PythonParserResult(null, context.snapshot);
   368                     parserResult.addError(error);
   369                     for (Error e : errors) {
   370                         parserResult.addError(e);
   371                     }
   372 
   373                     return parserResult;
   374                 }
   375             } else {
   376                 return sanitize(context, sanitizing);
   377             }
   378 
   379         } catch (PyException e) {
   380             // This is issue 251705
   381             Logger.getLogger(this.getClass().getName()).log(Level.WARNING, e.getMessage());
   382             return new PythonParserResult(null, context.snapshot);
   383         } catch (IllegalArgumentException e) {
   384             Logger.getLogger(this.getClass().getName()).log(Level.WARNING, e.getMessage());
   385             return new PythonParserResult(null, context.snapshot);
   386         } catch (NullPointerException e) {
   387             String fileName = "";
   388             if (file != null) {
   389                 fileName = FileUtil.getFileDisplayName(file);
   390             }
   391             e = Exceptions.attachMessage(e, "Was parsing " + fileName);
   392             Logger.getLogger(this.getClass().getName()).log(Level.WARNING, e.getMessage());
   393             return new PythonParserResult(null, context.snapshot);
   394         } catch (Throwable t) {
   395             runtimeException = t;
   396             StackTraceElement[] stackTrace = t.getStackTrace();
   397             if (stackTrace != null && stackTrace.length > 0 && stackTrace[0].getClassName().startsWith("org.python.antlr")) {//.runtime.tree.RewriteRuleElementStream")) {
   398                 // This is issue 150921
   399                 // Don't bug user about it -- we already know
   400                 Logger.getLogger(this.getClass().getName()).log(Level.FINE, "Encountered issue #150921", t);
   401             } else {
   402                 t = Exceptions.attachMessage(t, "Was parsing " + FileUtil.getFileDisplayName(file));
   403                 Exceptions.printStackTrace(t);
   404             }
   405             return new PythonParserResult(null, context.snapshot);
   406         }
   407     }
   408 
   409     private void sanitizeImportTokens(CommonTokenStream indentedTokens, List errors, FileObject file) {
   410         List tokens = indentedTokens.getTokens();
   411         List<CommonToken> tokensToRemove = new ArrayList<>();
   412         int i = 0;
   413         while (i < tokens.size()) {
   414             CommonToken importToken = (CommonToken)tokens.get(i);
   415             if ("import".equals(importToken.getText()) || "from".equals(importToken.getText())) {
   416                 // sanitizeDotTokens return the index of the token that starts the next line
   417                 i = sanitizeDotTokens(tokens, tokensToRemove, importToken, i + 1, errors, file);
   418             } else {
   419                 i++;
   420             }
   421         }
   422 
   423         for (CommonToken token : tokensToRemove) {
   424             tokens.remove(token);
   425         }
   426     }
   427 
   428     private int sanitizeDotTokens(List tokens, List tokensToRemove, CommonToken importToken,
   429             int startIndex, List errors, FileObject file) {
   430         for (int j = startIndex; j < tokens.size() - 1; j++) {
   431             CommonToken dotToken = (CommonToken)tokens.get(j);
   432             CommonToken nextToken = (CommonToken)tokens.get(j + 1);
   433             if (".".equals(dotToken.getText())) {
   434                 if (nextToken.getText().startsWith("\n")) {
   435                     tokensToRemove.add(dotToken);
   436                     String rawTokenText;
   437                     if (nextToken.getText().startsWith("\n")) {
   438                         rawTokenText = "\\n";
   439                     } else {
   440                         rawTokenText = " ";
   441                     }
   442                     errors.add(
   443                         new DefaultError(null, "Mismatch input '.' expecting NAME\nMissing NAME at '" + rawTokenText + "'",
   444                             null, file, importToken.getStartIndex(), dotToken.getStopIndex(), Severity.ERROR));
   445                 }
   446             } else if ("\n".equals(nextToken.getText())) { // End of line, must continue looping from external loop
   447                 return j + 1;
   448             }
   449         }
   450 
   451         return startIndex;
   452     }
   453 
   454     private static String asString(CharSequence sequence) {
   455         if (sequence instanceof String) {
   456             return (String)sequence;
   457         } else {
   458             return sequence.toString();
   459         }
   460     }
   461 
   462 
   463     @SuppressWarnings("fallthrough")
   464     private PythonParserResult sanitize(final Context context, final Sanitize sanitizing) {
   465 
   466         switch (sanitizing) {
   467         case NEVER:
   468             return new PythonParserResult(null, context.snapshot);
   469 
   470         case NONE:
   471             if (context.caretOffset != -1) {
   472                 return parse(context, Sanitize.EDITED_DOT);
   473             }
   474 
   475         case EDITED_DOT:
   476             // We've tried removing whitespace around the edit location
   477             // Fall through to try parsing with removing stuff around error location
   478             // (Don't bother doing this if errorOffset==caretOffset since that would try the same
   479             // source as EDITED_DOT which has no better chance of succeeding...)
   480             if (context.errorOffset != -1 && context.errorOffset != context.caretOffset) {
   481                 return parse(context, Sanitize.ERROR_DOT);
   482             }
   483 
   484         // Fall through to try the next trick
   485         case ERROR_DOT:
   486 
   487             // We've tried removing dots - now try removing the whole line at the error position
   488             if (context.errorOffset != -1) {
   489                 return parse(context, Sanitize.ERROR_LINE);
   490             }
   491 
   492         // Fall through to try the next trick
   493         case ERROR_LINE:
   494 
   495             // Messing with the error line didn't work - we could try "around" the error line
   496             // but I'm not attempting that now.
   497             // Finally try removing the whole line around the user editing position
   498             // (which could be far from where the error is showing up - but if you're typing
   499             // say a new "def" statement in a class, this will show up as an error on a mismatched
   500             // "end" statement rather than here
   501             if (context.caretOffset != -1) {
   502                 return parse(context, Sanitize.EDITED_LINE);
   503             }
   504 
   505         // Fall through for default handling
   506         case EDITED_LINE:
   507         default:
   508             // We're out of tricks - just return the failed parse result
   509             return new PythonParserResult(null, context.snapshot);
   510         }
   511     }
   512 
   513     /**
   514      * Try cleaning up the source buffer around the current offset to increase
   515      * likelihood of parse success. Initially this method had a lot of
   516      * logic to determine whether a parse was likely to fail (e.g. invoking
   517      * the isEndMissing method from bracket completion etc.).
   518      * However, I am now trying a parse with the real source first, and then
   519      * only if that fails do I try parsing with sanitized source. Therefore,
   520      * this method has to be less conservative in ripping out code since it
   521      * will only be used when the regular source is failing.
   522      *
   523      * @todo Automatically close current statement by inserting ";"
   524      * @todo Handle sanitizing "new ^" from parse errors
   525      * @todo Replace "end" insertion fix with "}" insertion
   526      */
   527     private boolean sanitizeSource(Context context, Sanitize sanitizing) {
   528         int offset = context.caretOffset;
   529 
   530         // Let caretOffset represent the offset of the portion of the buffer we'll be operating on
   531         if ((sanitizing == Sanitize.ERROR_DOT) || (sanitizing == Sanitize.ERROR_LINE)) {
   532             offset = context.errorOffset;
   533         }
   534 
   535         // Don't attempt cleaning up the source if we don't have the buffer position we need
   536         if (offset == -1) {
   537             return false;
   538         }
   539 
   540         // The user might be editing around the given caretOffset.
   541         // See if it looks modified
   542         // Insert an end statement? Insert a } marker?
   543         String doc = context.source;
   544         if (offset > doc.length()) {
   545             return false;
   546         }
   547 
   548         try {
   549             // Sometimes the offset shows up on the next line
   550             if (GsfUtilities.isRowEmpty(doc, offset) || GsfUtilities.isRowWhite(doc, offset)) {
   551                 offset = GsfUtilities.getRowStart(doc, offset) - 1;
   552                 if (offset < 0) {
   553                     offset = 0;
   554                 }
   555             }
   556 
   557             if (!(GsfUtilities.isRowEmpty(doc, offset) || GsfUtilities.isRowWhite(doc, offset))) {
   558                 if ((sanitizing == Sanitize.EDITED_LINE) || (sanitizing == Sanitize.ERROR_LINE)) {
   559                     // See if I should try to remove the current line, since it has text on it.
   560                     int lineEnd = GsfUtilities.getRowLastNonWhite(doc, offset);
   561 
   562                     if (lineEnd != -1) {
   563                         lineEnd++; // lineEnd is exclusive, not inclusive
   564                         StringBuilder sb = new StringBuilder(doc.length());
   565                         int lineStart = GsfUtilities.getRowStart(doc, offset);
   566                         if (lineEnd >= lineStart + 2) {
   567                             sb.append(doc.substring(0, lineStart));
   568                             sb.append("//");
   569                             int rest = lineStart + 2;
   570                             if (rest < doc.length()) {
   571                                 sb.append(doc.substring(rest, doc.length()));
   572                             }
   573                         } else {
   574                             // A line with just one character - can't replace with a comment
   575                             // Just replace the char with a space
   576                             sb.append(doc.substring(0, lineStart));
   577                             sb.append(" ");
   578                             int rest = lineStart + 1;
   579                             if (rest < doc.length()) {
   580                                 sb.append(doc.substring(rest, doc.length()));
   581                             }
   582 
   583                         }
   584 
   585                         assert sb.length() == doc.length();
   586 
   587                         context.sanitizedRange = new OffsetRange(lineStart, lineEnd);
   588                         context.sanitizedSource = sb.toString();
   589                         context.sanitizedContents = doc.substring(lineStart, lineEnd);
   590                         return true;
   591                     }
   592                 } else {
   593                     assert sanitizing == Sanitize.ERROR_DOT || sanitizing == Sanitize.EDITED_DOT;
   594                     // Try nuking dots/colons from this line
   595                     // See if I should try to remove the current line, since it has text on it.
   596                     int lineStart = GsfUtilities.getRowStart(doc, offset);
   597                     int lineEnd = offset - 1;
   598                     while (lineEnd >= lineStart && lineEnd < doc.length()) {
   599                         if (!Character.isWhitespace(doc.charAt(lineEnd))) {
   600                             break;
   601                         }
   602                         lineEnd--;
   603                     }
   604                     if (lineEnd > lineStart) {
   605                         StringBuilder sb = new StringBuilder(doc.length());
   606                         String line = doc.substring(lineStart, lineEnd + 1);
   607                         int removeChars = 0;
   608                         int removeEnd = lineEnd + 1;
   609                         boolean isLineEnd = GsfUtilities.getRowLastNonWhite(context.source, lineEnd) <= lineEnd;
   610 
   611                         if (line.endsWith(".")) { // NOI18N
   612                             removeChars = 1;
   613                         } else if (line.endsWith("(")) { // NOI18N
   614                             if (isLineEnd) {
   615                                 removeChars = 1;
   616                             }
   617                         } else if (line.endsWith(",")) { // NOI18N                            removeChars = 1;
   618                             if (!isLineEnd) {
   619                                 removeChars = 1;
   620                             }
   621                         } else if (line.endsWith(", ")) { // NOI18N
   622                             if (!isLineEnd) {
   623                                 removeChars = 2;
   624                             }
   625                         } else if (line.endsWith(",)")) { // NOI18N
   626                             // Handle lone comma in parameter list - e.g.
   627                             // type "foo(a," -> you end up with "foo(a,|)" which doesn't parse - but
   628                             // the line ends with ")", not "," !
   629                             // Just remove the comma
   630                             removeChars = 1;
   631                             removeEnd--;
   632                         } else if (line.endsWith(", )")) { // NOI18N
   633                             // Just remove the comma
   634                             removeChars = 1;
   635                             removeEnd -= 2;
   636                         } else if (line.endsWith(" def") && isLineEnd) { // NOI18N
   637                             removeChars = 3;
   638                         } else {
   639 //                            // Make sure the line doesn't end with one of the JavaScript keywords
   640 //                            // (new, do, etc) - we can't handle that!
   641 //                            for (String keyword : PythonUtils.PYTHON_KEYWORDS) { // reserved words are okay
   642 //                                if (line.endsWith(keyword)) {
   643 //                                    if ("print".equals(keyword)) { // NOI18N
   644 //                                        // Only remove the keyword if it's the end of the line. Otherwise,
   645 //                                        // it could have just been typed in front of something (e.g. inserted a print) and we don't
   646 //                                        // want to confuse the parser with "va foo" instead of "var foo"
   647 //                                        if (!isLineEnd) {
   648 //                                            continue;
   649 //                                        }
   650 //                                    }
   651 //                                    removeChars = 1;
   652 //                                    break;
   653 //                                }
   654 //                            }
   655                         }
   656 
   657                         if (removeChars == 0) {
   658                             return false;
   659                         }
   660 
   661                         int removeStart = removeEnd - removeChars;
   662 
   663                         sb.append(doc.substring(0, removeStart));
   664 
   665                         for (int i = 0; i < removeChars; i++) {
   666                             sb.append(' ');
   667                         }
   668 
   669                         if (removeEnd < doc.length()) {
   670                             sb.append(doc.substring(removeEnd, doc.length()));
   671                         }
   672                         assert sb.length() == doc.length();
   673 
   674                         context.sanitizedRange = new OffsetRange(removeStart, removeEnd);
   675                         context.sanitizedSource = sb.toString();
   676                         context.sanitizedContents = doc.substring(removeStart, removeEnd);
   677                         return true;
   678                     }
   679                 }
   680             }
   681         } catch (BadLocationException ble) {
   682             Exceptions.printStackTrace(ble);
   683         }
   684 
   685         return false;
   686     }
   687 
   688     private static int findLineOffset(String source, int line) {
   689         int offset = -1;
   690         for (int i = 0; i < line; i++) {
   691             offset = source.indexOf("\n", offset+1);
   692             if (offset == -1) {
   693                 return source.length();
   694             }
   695         }
   696 
   697         return Math.min(source.length(), offset+1);
   698     }
   699 
   700     /** Attempts to sanitize the input buffer */
   701     public static enum Sanitize {
   702         /** Only parse the current file accurately, don't try heuristics */
   703         NEVER,
   704         /** Perform no sanitization */
   705         NONE,
   706         /** Try to remove the trailing . or :: at the caret line */
   707         EDITED_DOT,
   708         /** Try to remove the trailing . or :: at the error position, or the prior
   709          * line, or the caret line */
   710         ERROR_DOT,
   711         /** Try to cut out the error line */
   712         ERROR_LINE,
   713         /** Try to cut out the current edited line, if known */
   714         EDITED_LINE,
   715     }
   716 
   717     /** Sanitize context */
   718     public static class Context {
   719         private FileObject file;
   720 //        private ParseListener listener;
   721         private int errorOffset;
   722         private String source;
   723         private String sanitizedSource;
   724         private OffsetRange sanitizedRange = OffsetRange.NONE;
   725         private String sanitizedContents;
   726         private int caretOffset;
   727         private Sanitize sanitized = Sanitize.NONE;
   728 //        private TranslatedSource translatedSource;
   729 //        private Parser.Job job;
   730         private Snapshot snapshot;
   731         private Task task;
   732         private SourceModificationEvent event;
   733 //
   734 //        public Context(ParserFile parserFile, ParseListener listener, String source, int caretOffset, TranslatedSource translatedSource, Parser.Job job) {
   735 //            this.file = parserFile;
   736 //            this.listener = listener;
   737 //            this.source = source;
   738 //            this.caretOffset = caretOffset;
   739 //            this.translatedSource = translatedSource;
   740 //            this.job = job;
   741 //
   742 //
   743 //            if (caretOffset != -1) {
   744 //                sanitized = Sanitize.EDITED_DOT;
   745 //            }
   746 //        }
   747 //
   748 //        @Override
   749 //        public String toString() {
   750 //            return "PythonParser.Context(" + file.toString() + ")"; // NOI18N
   751 //        }
   752 //
   753 //        public OffsetRange getSanitizedRange() {
   754 //            return sanitizedRange;
   755 //        }
   756 //
   757 //        public Sanitize getSanitized() {
   758 //            return sanitized;
   759 //        }
   760 //
   761 //        public String getSanitizedSource() {
   762 //            return sanitizedSource;
   763 //        }
   764 //
   765 //        public int getErrorOffset() {
   766 //            return errorOffset;
   767 //        }
   768     }
   769 }