python.source/src/org/netbeans/modules/python/source/PythonParser.java
author Julien Enselme <jenselme@netbeans.org>
Tue, 27 Jun 2017 21:26:18 +0200
changeset 18429 517409415907
parent 18392 6ccd27d3f884
permissions -rw-r--r--
#251705: UnicodeDecodeError pop-up while entering \x code
tor@16862
     1
/*
tor@16862
     2
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
tor@16862
     3
 *
jglick@17225
     4
 * Copyright 1997-2010 Oracle and/or its affiliates. All rights reserved.
jglick@17225
     5
 *
jglick@17225
     6
 * Oracle and Java are registered trademarks of Oracle and/or its affiliates.
jglick@17225
     7
 * Other names may be trademarks of their respective owners.
tor@16862
     8
 *
tor@16862
     9
 * The contents of this file are subject to the terms of either the GNU
tor@16862
    10
 * General Public License Version 2 only ("GPL") or the Common
tor@16862
    11
 * Development and Distribution License("CDDL") (collectively, the
tor@16862
    12
 * "License"). You may not use this file except in compliance with the
tor@16862
    13
 * License. You can obtain a copy of the License at
tor@16862
    14
 * http://www.netbeans.org/cddl-gplv2.html
tor@16862
    15
 * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
tor@16862
    16
 * specific language governing permissions and limitations under the
tor@16862
    17
 * License.  When distributing the software, include this License Header
tor@16862
    18
 * Notice in each file and include the License file at
jglick@17225
    19
 * nbbuild/licenses/CDDL-GPL-2-CP.  Oracle designates this
tor@16862
    20
 * particular file as subject to the "Classpath" exception as provided
jglick@17225
    21
 * by Oracle in the GPL Version 2 section of the License file that
tor@16862
    22
 * accompanied this code. If applicable, add the following below the
tor@16862
    23
 * License Header, with the fields enclosed by brackets [] replaced by
tor@16862
    24
 * your own identifying information:
tor@16862
    25
 * "Portions Copyrighted [year] [name of copyright owner]"
tor@16862
    26
 *
tor@16862
    27
 * Contributor(s):
tor@16862
    28
 *
tor@16862
    29
 * Portions Copyrighted 2007 Sun Microsystems, Inc.
tor@16862
    30
 */
ralphbenjamin@18313
    31
package org.netbeans.modules.python.source;
tor@16862
    32
tor@17137
    33
import java.io.InputStream;
tor@17137
    34
import java.io.InputStreamReader;
tor@16862
    35
import java.util.ArrayList;
tor@16862
    36
import java.util.List;
tor@16862
    37
import java.util.logging.Level;
tor@16862
    38
import java.util.logging.Logger;
juniel_katarn@18215
    39
import javax.swing.event.ChangeListener;
tor@16862
    40
import javax.swing.text.BadLocationException;
juniel_katarn@18215
    41
import org.netbeans.modules.csl.api.Severity;
juniel_katarn@18215
    42
import org.netbeans.modules.csl.spi.DefaultError;
juniel_katarn@18215
    43
import org.netbeans.modules.csl.api.Error;
juniel_katarn@18215
    44
import org.netbeans.modules.csl.api.OffsetRange;
juniel_katarn@18215
    45
import org.netbeans.modules.csl.spi.GsfUtilities;
juniel_katarn@18215
    46
import org.netbeans.modules.parsing.api.Snapshot;
juniel_katarn@18215
    47
import org.netbeans.modules.parsing.api.Task;
juniel_katarn@18215
    48
import org.netbeans.modules.parsing.spi.Parser;
juniel_katarn@18215
    49
import org.netbeans.modules.parsing.spi.SourceModificationEvent;
vincentvdl@18286
    50
import org.netbeans.modules.python.api.PythonFileEncodingQuery;
juniel_katarn@18215
    51
import org.openide.filesystems.FileObject;
tor@17030
    52
import org.python.antlr.runtime.ANTLRStringStream;
tor@17030
    53
import org.python.antlr.runtime.BaseRecognizer;
tor@17030
    54
import org.python.antlr.runtime.BitSet;
tor@17030
    55
import org.python.antlr.runtime.CommonToken;
tor@17137
    56
import org.python.antlr.runtime.CommonTokenStream;
tor@17030
    57
import org.python.antlr.runtime.IntStream;
tor@17030
    58
import org.python.antlr.runtime.Lexer;
tor@17030
    59
import org.python.antlr.runtime.MismatchedTokenException;
tor@17030
    60
import org.python.antlr.runtime.RecognitionException;
tor@16862
    61
tor@16862
    62
import org.openide.filesystems.FileUtil;
tor@16862
    63
import org.openide.util.Exceptions;
tor@16862
    64
import org.python.antlr.ListErrorHandler;
tor@16862
    65
import org.python.antlr.ParseException;
tor@17137
    66
import org.python.antlr.PythonLexer;
tor@17137
    67
import org.python.antlr.PythonTokenSource;
tor@16862
    68
import org.python.antlr.PythonTree;
tor@17137
    69
import org.python.antlr.PythonTreeAdaptor;
tor@16923
    70
import org.python.antlr.base.expr;
tor@16923
    71
import org.python.antlr.base.mod;
tor@16923
    72
import org.python.antlr.base.slice;
tor@16923
    73
import org.python.antlr.base.stmt;
tor@17137
    74
import org.python.antlr.runtime.ANTLRReaderStream;
tor@17137
    75
import org.python.antlr.runtime.CharStream;
jenselme@18429
    76
import org.python.core.PyException;
tor@16862
    77
tor@16862
    78
/**
tor@16862
    79
 * Parser for Python. Wraps Jython.
tor@16862
    80
 * 
tor@16862
    81
 * @author Frank Wierzbicki
tor@16862
    82
 * @author Tor Norbye
tor@16862
    83
 */
juniel_katarn@18215
    84
public class PythonParser extends Parser {
tor@16862
    85
    /** For unit tests such that they can make sure we didn't have a parser abort */
tor@16862
    86
    static Throwable runtimeException;
tor@16862
    87
tor@16893
    88
    static {
tor@16893
    89
        org.python.core.PySystemState.initialize();
tor@16893
    90
    }
juniel_katarn@18215
    91
    
juniel_katarn@18215
    92
    private Result lastResult;
vincentvdl@18286
    93
    private final PythonFileEncodingQuery fileEncodingQuery = new PythonFileEncodingQuery();
vincentvdl@18286
    94
    private String headerCached = null;
vincentvdl@18286
    95
    private String encodingCache = null;
tor@16893
    96
tor@17137
    97
    public mod file_input(CharStream charStream, String fileName) throws RecognitionException {
tor@17137
    98
        ListErrorHandler eh = new ListErrorHandler();
tor@17137
    99
        mod tree = null;
jenselme@18273
   100
        PythonLexer lexer = new PythonLexer(charStream);
tor@17137
   101
        lexer.setErrorHandler(eh);
tor@17137
   102
        CommonTokenStream tokens = new CommonTokenStream(lexer);
tor@17137
   103
        tokens.discardOffChannelTokens(true);
tor@17137
   104
        PythonTokenSource indentedSource = new PythonTokenSource(tokens, fileName);
tor@17137
   105
        tokens = new CommonTokenStream(indentedSource);
tor@17137
   106
        org.python.antlr.PythonParser parser = new org.python.antlr.PythonParser(tokens);
tor@17137
   107
        parser.setTreeAdaptor(new PythonTreeAdaptor());
tor@17137
   108
        parser.setErrorHandler(eh);
tor@17137
   109
        org.python.antlr.PythonParser.file_input_return r = parser.file_input();
tor@17137
   110
        tree = (mod)r.getTree();
tor@17137
   111
        return tree;
tor@17137
   112
    }
tor@17137
   113
juniel_katarn@18215
   114
    @Override
juniel_katarn@18215
   115
    public void addChangeListener(ChangeListener changeListener) {}
juniel_katarn@18215
   116
juniel_katarn@18215
   117
    @Override
juniel_katarn@18215
   118
    public void removeChangeListener(ChangeListener changeListener) {}
juniel_katarn@18215
   119
    
tor@17137
   120
    public PythonTree parse(InputStream istream, String fileName) throws Exception {
tor@17137
   121
        InputStreamReader reader = new InputStreamReader(istream, "ISO-8859-1");
tor@17137
   122
        return file_input(new ANTLRReaderStream(reader), fileName);
tor@17137
   123
    }
juniel_katarn@18215
   124
    
juniel_katarn@18215
   125
    @Override
juniel_katarn@18215
   126
    public final Result getResult(Task task) throws org.netbeans.modules.parsing.spi.ParseException {
juniel_katarn@18215
   127
        return lastResult;
juniel_katarn@18215
   128
    }
juniel_katarn@18215
   129
    
juniel_katarn@18215
   130
    private static final Logger LOG = Logger.getLogger(PythonParser.class.getName());
tor@17137
   131
juniel_katarn@18215
   132
    @Override
juniel_katarn@18215
   133
    public void parse(Snapshot snapshot, Task task, SourceModificationEvent event) throws org.netbeans.modules.parsing.spi.ParseException {
juniel_katarn@18215
   134
        Context context = new Context();
juniel_katarn@18215
   135
        context.snapshot = snapshot;
juniel_katarn@18215
   136
        context.event = event;
juniel_katarn@18215
   137
        context.task = task;
juniel_katarn@18215
   138
        context.caretOffset = GsfUtilities.getLastKnownCaretOffset(snapshot, event);
juniel_katarn@18215
   139
        context.source = snapshot.getText().toString();
juniel_katarn@18215
   140
        context.file = snapshot.getSource().getFileObject();
ralphbenjamin@18243
   141
        if(context.file == null) {
ralphbenjamin@18243
   142
            return; // TODO: parse the source, not the file
ralphbenjamin@18243
   143
        }
juniel_katarn@18215
   144
        /* Let's not sanitize ;-) Would be great if we could have a more robust parser
juniel_katarn@18215
   145
        if (context.caretOffset != -1) {
juniel_katarn@18215
   146
            context.sanitized = Sanitize.EDITED_DOT;
juniel_katarn@18215
   147
        }
juniel_katarn@18215
   148
        */
juniel_katarn@18215
   149
        lastResult = parse(context, context.sanitized);
juniel_katarn@18215
   150
    }
juniel_katarn@18215
   151
    public PythonParserResult parse(final Context context, Sanitize sanitizing) {
tor@16862
   152
        boolean sanitizedSource = false;
tor@16862
   153
        String sourceCode = context.source;
tor@16862
   154
        if (!((sanitizing == Sanitize.NONE) || (sanitizing == Sanitize.NEVER))) {
tor@16862
   155
            boolean ok = sanitizeSource(context, sanitizing);
tor@16862
   156
tor@16862
   157
            if (ok) {
tor@16862
   158
                assert context.sanitizedSource != null;
tor@16862
   159
                sanitizedSource = true;
tor@16862
   160
                sourceCode = context.sanitizedSource;
tor@16862
   161
            } else {
tor@16862
   162
                // Try next trick
tor@16862
   163
                return sanitize(context, sanitizing);
tor@16862
   164
            }
tor@16862
   165
        }
tor@16862
   166
        final String source = sourceCode;
tor@16862
   167
tor@16862
   168
        if (sanitizing == Sanitize.NONE) {
tor@16862
   169
            context.errorOffset = -1;
tor@16862
   170
        }
tor@16862
   171
ralphbenjamin@18263
   172
        final List<Error> errors = new ArrayList<>();
juniel_katarn@18215
   173
        final FileObject file = context.file;
tor@16862
   174
        try {
tor@16862
   175
            String fileName = file.getNameExt();
tor@16923
   176
            // TODO - sniff file headers etc. Frank's comment:
tor@16923
   177
            // Longer term for Python compatibility, having NetBeans sniff the top two lines
tor@16923
   178
            // for an encoding would be the right thing to do from a pure Python
tor@16923
   179
            // compatibility standard (see http://www.python.org/dev/peps/pep-0263/) I
tor@16923
   180
            // have pep-0263 code in Jython that I could probably extract for this
tor@16923
   181
            // purpose down the road.
tor@16923
   182
            //String charset = "ISO8859_1"; // NOI18N
tor@16923
   183
            //String charset = "UTF-8"; // NOI18N
tor@16923
   184
            //String charset = "iso8859_1"; // NOI18N
vincentvdl@18286
   185
            // TODO: improve this check.
vincentvdl@18286
   186
            int cache_len = sourceCode.length() >= 64 ? 64 : sourceCode.length();
vincentvdl@18286
   187
            if (headerCached == null || cache_len != headerCached.length() || !headerCached.equals(sourceCode.substring(0, cache_len))) {
vincentvdl@18286
   188
                headerCached = sourceCode.substring(0, cache_len);
vincentvdl@18286
   189
                encodingCache = fileEncodingQuery.getPythonFileEncoding(sourceCode.split("\n", 2));                
vincentvdl@18286
   190
            }
vincentvdl@18286
   191
            String charset = encodingCache;            
vincentvdl@18286
   192
                
tor@16862
   193
            final boolean ignoreErrors = sanitizedSource;
tor@16862
   194
            ListErrorHandler errorHandler = new ListErrorHandler() {
tor@16862
   195
                @Override
tor@16862
   196
                public void error(String message, PythonTree t) {
juniel_katarn@18215
   197
                    errors.add(new DefaultError(null, message, null, file, t.getCharStartIndex(), t.getCharStopIndex(), Severity.ERROR));
tor@16862
   198
                    super.error(message, t);
tor@16862
   199
                }
tor@16862
   200
tor@16862
   201
                @Override
tor@16923
   202
                public expr errorExpr(PythonTree t) {
tor@16862
   203
                    return super.errorExpr(t);
tor@16862
   204
                }
tor@16862
   205
tor@16862
   206
                @Override
tor@16923
   207
                public mod errorMod(PythonTree t) {
tor@16862
   208
                    return super.errorMod(t);
tor@16862
   209
                }
tor@16862
   210
tor@16862
   211
                @Override
tor@16923
   212
                public slice errorSlice(PythonTree t) {
tor@16862
   213
                    return super.errorSlice(t);
tor@16862
   214
                }
tor@16862
   215
tor@16862
   216
                @Override
tor@16923
   217
                public stmt errorStmt(PythonTree t) {
tor@16862
   218
                    return super.errorStmt(t);
tor@16862
   219
                }
tor@16862
   220
tor@16862
   221
                @Override
tor@16862
   222
                public boolean mismatch(BaseRecognizer br, IntStream input, int ttype, BitSet follow) {
tor@16862
   223
                    return super.mismatch(br, input, ttype, follow);
tor@16862
   224
                }
tor@16862
   225
tor@16862
   226
                @Override
tor@16862
   227
                public Object recoverFromMismatchedToken(BaseRecognizer br, IntStream input, int ttype, BitSet follow) {
tor@17173
   228
                    MismatchedTokenException mt = new MismatchedTokenException(ttype, input);
tor@17173
   229
                    String message = br.getErrorMessage(mt, br.getTokenNames());
tor@17173
   230
                    if (mt.line >= 1) {
tor@17173
   231
                        int lineOffset = findLineOffset(context.source, mt.line-1);
tor@17173
   232
                        if (mt.charPositionInLine > 0) {
tor@17173
   233
                            lineOffset += mt.charPositionInLine;
tor@17173
   234
                        }
tor@17173
   235
                        int start = lineOffset;//t.getCharStartIndex();
tor@17173
   236
                        int stop = lineOffset;//t.getCharStopIndex();
juniel_katarn@18215
   237
                        errors.add(new DefaultError(null, message, null, file, start, stop, Severity.ERROR));
tor@17173
   238
                    }
tor@16862
   239
                    return super.recoverFromMismatchedToken(br, input, ttype, follow);
tor@16862
   240
                }
tor@16862
   241
tor@16862
   242
                @Override
tor@16862
   243
                public void recover(Lexer lex, RecognitionException re) {
tor@16862
   244
                    super.recover(lex, re);
tor@16862
   245
                }
tor@16862
   246
tor@16862
   247
                @Override
tor@16862
   248
                public void recover(BaseRecognizer br, IntStream input, RecognitionException re) {
tor@16862
   249
                    super.recover(br, input, re);
tor@16862
   250
                }
tor@16862
   251
tor@16862
   252
                @Override
tor@16862
   253
                public void reportError(BaseRecognizer br, RecognitionException re) {
tor@16862
   254
                    if (!ignoreErrors) {
tor@16862
   255
                        String message = br.getErrorMessage(re, br.getTokenNames());
tor@16862
   256
                        if (message == null || message.length() == 0) {
tor@16862
   257
                            message = re.getMessage();
tor@16862
   258
                        }
tor@16862
   259
                        if (message == null) {
tor@16862
   260
                            //message = re.getUnexpectedType();
tor@16862
   261
                            message = re.toString();
tor@16862
   262
                        }
tor@16862
   263
                        int start = re.index;
tor@16862
   264
tor@16862
   265
                        // Try to find the line offset. re.index doesn't do the trick.
tor@16862
   266
                        start = PythonUtils.getOffsetByLineCol(source, re.line - 1, 0); // -1: 0-based
tor@16862
   267
                        int end = start;
tor@16862
   268
                        if (re.charPositionInLine > 0) {
tor@16862
   269
                            try {
tor@16862
   270
                                end = GsfUtilities.getRowLastNonWhite(source, start) + 1;
tor@16862
   271
                                start += re.charPositionInLine;
tor@16862
   272
                                if (end < start) {
tor@16862
   273
                                    end = start;
tor@16862
   274
                                }
tor@16862
   275
                            } catch (BadLocationException ex) {
tor@16862
   276
                                Exceptions.printStackTrace(ex);
tor@16862
   277
                                end = start;
tor@16862
   278
                            }
tor@16862
   279
                            if (end == 0) {
tor@16862
   280
                                end = start;
tor@16862
   281
                            }
tor@16862
   282
                        }
tor@16862
   283
tor@16862
   284
                        // Some errors have better offsets if we look at the token stream
tor@16862
   285
                        if (re instanceof MismatchedTokenException) {
tor@16862
   286
                            MismatchedTokenException m = (MismatchedTokenException)re;
tor@16862
   287
                            if (m.token != null) {
tor@17030
   288
                                if (m.token instanceof org.python.antlr.runtime.CommonToken) {
tor@17030
   289
                                    CommonToken token = (org.python.antlr.runtime.CommonToken)m.token;
tor@16862
   290
                                    start = token.getStartIndex();
tor@16862
   291
                                    end = token.getStopIndex();
tor@16862
   292
                                }
tor@16862
   293
                            }
tor@16862
   294
                        }
tor@16862
   295
tor@16862
   296
                        if (start > source.length()) {
tor@16862
   297
                            start = source.length();
tor@16862
   298
                            end = start;
tor@16862
   299
                        }
tor@16862
   300
juniel_katarn@18215
   301
                        errors.add(new DefaultError(null, message, null, file, start, end, Severity.ERROR));
tor@16862
   302
jenselme@18295
   303
                        // In order to avoid a StackOverflowError, the BaseRecognizer must be recreated.
jenselme@18295
   304
                        // We must keep the names of the tokens to avoid a NullPointerException.
jenselme@18295
   305
                        // See bz252630
jenselme@18295
   306
                        final String[] tokenNames = br.getTokenNames();
jenselme@18273
   307
                        br = new BaseRecognizer() {
jenselme@18273
   308
jenselme@18273
   309
                            @Override
jenselme@18273
   310
                            public String getSourceName() {
jenselme@18273
   311
                                return file.getName();
jenselme@18273
   312
                            }
jenselme@18295
   313
jenselme@18295
   314
                            @Override
jenselme@18295
   315
                            public String[] getTokenNames() {
jenselme@18295
   316
                                return tokenNames;
jenselme@18295
   317
                            }
jenselme@18273
   318
                        };
jenselme@18295
   319
jenselme@18295
   320
                        super.reportError(br, re);
tor@16862
   321
                    }
tor@16862
   322
                }
tor@16862
   323
            };
tor@17137
   324
jenselme@18273
   325
            PythonLexer lexer = new PythonLexer(new ANTLRStringStream(sourceCode));
tor@17137
   326
            lexer.setErrorHandler(errorHandler);
tor@17137
   327
            CommonTokenStream tokens = new CommonTokenStream(lexer);
tor@17137
   328
            tokens.discardOffChannelTokens(true);
tor@17137
   329
            PythonTokenSource indentedSource = new PythonTokenSource(tokens, fileName);
juniel_katarn@18215
   330
            CommonTokenStream indentedTokens = new CommonTokenStream(indentedSource);
jenselme@18298
   331
            // Import line ending with a dot raise a NullPointerException in
jenselme@18298
   332
            // org.python.antlr.GrammarActions.makeDottedText called from parser.file_input
jenselme@18298
   333
            // sanitizeImportTokens will remove the dot token from the list of tokens in
jenselme@18298
   334
            // indentedTokens to avoid the bug and add an error at this file.
jenselme@18298
   335
            // See https://netbeans.org/bugzilla/show_bug.cgi?id=252356
jenselme@18298
   336
            sanitizeImportTokens(indentedTokens, errors, file);
vincentvdl@18286
   337
            org.python.antlr.PythonParser parser;
vincentvdl@18286
   338
            if (charset != null) {
vincentvdl@18286
   339
                parser = new org.python.antlr.PythonParser(indentedTokens, charset);
vincentvdl@18286
   340
            } else {
vincentvdl@18286
   341
                parser = new org.python.antlr.PythonParser(indentedTokens);
vincentvdl@18286
   342
            }
tor@17137
   343
            parser.setTreeAdaptor(new PythonTreeAdaptor());
tor@17137
   344
            parser.setErrorHandler(errorHandler);
tor@17137
   345
            org.python.antlr.PythonParser.file_input_return r = parser.file_input();
tor@17137
   346
            PythonTree t = (PythonTree)r.getTree();
juniel_katarn@18215
   347
            PythonParserResult result = new PythonParserResult(t, context.snapshot);
juniel_katarn@18215
   348
            result.setErrors(errors);
tor@16862
   349
tor@16862
   350
            result.setSanitized(context.sanitized, context.sanitizedRange, context.sanitizedContents);
tor@16862
   351
            result.setSource(sourceCode);
tor@16862
   352
tor@16862
   353
            return result;
tor@16862
   354
        } catch (ParseException pe) {
tor@16862
   355
            if (sanitizing == Sanitize.NONE) {
tor@16862
   356
                PythonParserResult sanitizedResult = sanitize(context, sanitizing);
tor@16862
   357
                if (sanitizedResult.isValid()) {
tor@16862
   358
                    return sanitizedResult;
tor@16862
   359
                } else {
tor@16862
   360
                    int offset = pe.index;
tor@16862
   361
                    assert offset >= 0;
tor@16862
   362
                    String desc = pe.getLocalizedMessage();
tor@16862
   363
                    if (desc == null) {
tor@16862
   364
                        desc = pe.getMessage();
tor@16862
   365
                    }
juniel_katarn@18215
   366
                    DefaultError error = new DefaultError(null /*key*/, desc, null, file, offset, offset, Severity.ERROR);
juniel_katarn@18215
   367
                    PythonParserResult parserResult = new PythonParserResult(null, context.snapshot);
tor@16862
   368
                    parserResult.addError(error);
tor@16862
   369
                    for (Error e : errors) {
tor@16862
   370
                        parserResult.addError(e);
tor@16862
   371
                    }
tor@16862
   372
tor@16862
   373
                    return parserResult;
tor@16862
   374
                }
tor@16862
   375
            } else {
tor@16862
   376
                return sanitize(context, sanitizing);
tor@16862
   377
            }
jenselme@18429
   378
jenselme@18429
   379
        } catch (PyException e) {
jenselme@18429
   380
            // This is issue 251705
jenselme@18429
   381
            Logger.getLogger(this.getClass().getName()).log(Level.WARNING, e.getMessage());
jenselme@18429
   382
            return new PythonParserResult(null, context.snapshot);
jenselme@18392
   383
        } catch (IllegalArgumentException e) {
jenselme@18392
   384
            Logger.getLogger(this.getClass().getName()).log(Level.WARNING, e.getMessage());
jenselme@18392
   385
            return new PythonParserResult(null, context.snapshot);
tor@16862
   386
        } catch (NullPointerException e) {
tor@16862
   387
            String fileName = "";
juniel_katarn@18215
   388
            if (file != null) {
juniel_katarn@18215
   389
                fileName = FileUtil.getFileDisplayName(file);
tor@16862
   390
            }
juniel_katarn@18215
   391
            e = Exceptions.attachMessage(e, "Was parsing " + fileName);
jenselme@18379
   392
            Logger.getLogger(this.getClass().getName()).log(Level.WARNING, e.getMessage());
juniel_katarn@18215
   393
            return new PythonParserResult(null, context.snapshot);
tor@16862
   394
        } catch (Throwable t) {
tor@16862
   395
            runtimeException = t;
tor@16862
   396
            StackTraceElement[] stackTrace = t.getStackTrace();
juniel_katarn@18221
   397
            if (stackTrace != null && stackTrace.length > 0 && stackTrace[0].getClassName().startsWith("org.python.antlr")) {//.runtime.tree.RewriteRuleElementStream")) {
tor@16862
   398
                // This is issue 150921
tor@16862
   399
                // Don't bug user about it -- we already know
tor@16862
   400
                Logger.getLogger(this.getClass().getName()).log(Level.FINE, "Encountered issue #150921", t);
tor@16862
   401
            } else {
juniel_katarn@18215
   402
                t = Exceptions.attachMessage(t, "Was parsing " + FileUtil.getFileDisplayName(file));
tor@16862
   403
                Exceptions.printStackTrace(t);
tor@16862
   404
            }
juniel_katarn@18215
   405
            return new PythonParserResult(null, context.snapshot);
tor@16862
   406
        }
tor@16862
   407
    }
tor@16862
   408
jenselme@18298
   409
    private void sanitizeImportTokens(CommonTokenStream indentedTokens, List errors, FileObject file) {
jenselme@18298
   410
        List tokens = indentedTokens.getTokens();
jenselme@18298
   411
        List<CommonToken> tokensToRemove = new ArrayList<>();
jenselme@18298
   412
        int i = 0;
jenselme@18298
   413
        while (i < tokens.size()) {
jenselme@18298
   414
            CommonToken importToken = (CommonToken)tokens.get(i);
jenselme@18298
   415
            if ("import".equals(importToken.getText()) || "from".equals(importToken.getText())) {
jenselme@18298
   416
                // sanitizeDotTokens return the index of the token that starts the next line
jenselme@18298
   417
                i = sanitizeDotTokens(tokens, tokensToRemove, importToken, i + 1, errors, file);
jenselme@18298
   418
            } else {
jenselme@18298
   419
                i++;
jenselme@18298
   420
            }
jenselme@18298
   421
        }
jenselme@18298
   422
jenselme@18298
   423
        for (CommonToken token : tokensToRemove) {
jenselme@18298
   424
            tokens.remove(token);
jenselme@18298
   425
        }
jenselme@18298
   426
    }
jenselme@18298
   427
jenselme@18298
   428
    private int sanitizeDotTokens(List tokens, List tokensToRemove, CommonToken importToken,
jenselme@18298
   429
            int startIndex, List errors, FileObject file) {
jenselme@18298
   430
        for (int j = startIndex; j < tokens.size() - 1; j++) {
jenselme@18298
   431
            CommonToken dotToken = (CommonToken)tokens.get(j);
jenselme@18298
   432
            CommonToken nextToken = (CommonToken)tokens.get(j + 1);
jenselme@18298
   433
            if (".".equals(dotToken.getText())) {
jenselme@18298
   434
                if (nextToken.getText().startsWith("\n")) {
jenselme@18298
   435
                    tokensToRemove.add(dotToken);
jenselme@18298
   436
                    String rawTokenText;
jenselme@18298
   437
                    if (nextToken.getText().startsWith("\n")) {
jenselme@18298
   438
                        rawTokenText = "\\n";
jenselme@18298
   439
                    } else {
jenselme@18298
   440
                        rawTokenText = " ";
jenselme@18298
   441
                    }
jenselme@18298
   442
                    errors.add(
jenselme@18298
   443
                        new DefaultError(null, "Mismatch input '.' expecting NAME\nMissing NAME at '" + rawTokenText + "'",
jenselme@18298
   444
                            null, file, importToken.getStartIndex(), dotToken.getStopIndex(), Severity.ERROR));
jenselme@18298
   445
                }
jenselme@18298
   446
            } else if ("\n".equals(nextToken.getText())) { // End of line, must continue looping from external loop
jenselme@18298
   447
                return j + 1;
jenselme@18298
   448
            }
jenselme@18298
   449
        }
jenselme@18298
   450
jenselme@18298
   451
        return startIndex;
jenselme@18298
   452
    }
jenselme@18298
   453
tor@16862
   454
    private static String asString(CharSequence sequence) {
tor@16862
   455
        if (sequence instanceof String) {
tor@16862
   456
            return (String)sequence;
tor@16862
   457
        } else {
tor@16862
   458
            return sequence.toString();
tor@16862
   459
        }
tor@16862
   460
    }
tor@16862
   461
tor@16862
   462
tor@16862
   463
    @SuppressWarnings("fallthrough")
juniel_katarn@18215
   464
    private PythonParserResult sanitize(final Context context, final Sanitize sanitizing) {
tor@16862
   465
tor@16862
   466
        switch (sanitizing) {
tor@16862
   467
        case NEVER:
juniel_katarn@18215
   468
            return new PythonParserResult(null, context.snapshot);
tor@16862
   469
tor@17208
   470
        case NONE:
tor@17208
   471
            if (context.caretOffset != -1) {
tor@17208
   472
                return parse(context, Sanitize.EDITED_DOT);
tor@17208
   473
            }
tor@17208
   474
tor@16862
   475
        case EDITED_DOT:
tor@16862
   476
            // We've tried removing whitespace around the edit location
tor@17208
   477
            // Fall through to try parsing with removing stuff around error location
tor@16862
   478
            // (Don't bother doing this if errorOffset==caretOffset since that would try the same
tor@16862
   479
            // source as EDITED_DOT which has no better chance of succeeding...)
tor@16862
   480
            if (context.errorOffset != -1 && context.errorOffset != context.caretOffset) {
tor@16862
   481
                return parse(context, Sanitize.ERROR_DOT);
tor@16862
   482
            }
tor@16862
   483
tor@16862
   484
        // Fall through to try the next trick
tor@16862
   485
        case ERROR_DOT:
tor@16862
   486
tor@16862
   487
            // We've tried removing dots - now try removing the whole line at the error position
tor@16862
   488
            if (context.errorOffset != -1) {
tor@16862
   489
                return parse(context, Sanitize.ERROR_LINE);
tor@16862
   490
            }
tor@16862
   491
tor@16862
   492
        // Fall through to try the next trick
tor@16862
   493
        case ERROR_LINE:
tor@16862
   494
tor@16862
   495
            // Messing with the error line didn't work - we could try "around" the error line
tor@16862
   496
            // but I'm not attempting that now.
tor@16862
   497
            // Finally try removing the whole line around the user editing position
tor@16862
   498
            // (which could be far from where the error is showing up - but if you're typing
tor@16862
   499
            // say a new "def" statement in a class, this will show up as an error on a mismatched
tor@16862
   500
            // "end" statement rather than here
tor@16862
   501
            if (context.caretOffset != -1) {
tor@16862
   502
                return parse(context, Sanitize.EDITED_LINE);
tor@16862
   503
            }
tor@16862
   504
tor@16862
   505
        // Fall through for default handling
tor@16862
   506
        case EDITED_LINE:
tor@16862
   507
        default:
tor@16862
   508
            // We're out of tricks - just return the failed parse result
juniel_katarn@18215
   509
            return new PythonParserResult(null, context.snapshot);
tor@16862
   510
        }
tor@16862
   511
    }
tor@16862
   512
tor@16862
   513
    /**
tor@16862
   514
     * Try cleaning up the source buffer around the current offset to increase
tor@16862
   515
     * likelihood of parse success. Initially this method had a lot of
tor@16862
   516
     * logic to determine whether a parse was likely to fail (e.g. invoking
tor@16862
   517
     * the isEndMissing method from bracket completion etc.).
tor@16862
   518
     * However, I am now trying a parse with the real source first, and then
tor@16862
   519
     * only if that fails do I try parsing with sanitized source. Therefore,
tor@16862
   520
     * this method has to be less conservative in ripping out code since it
tor@16862
   521
     * will only be used when the regular source is failing.
tor@16862
   522
     *
tor@16862
   523
     * @todo Automatically close current statement by inserting ";"
tor@16862
   524
     * @todo Handle sanitizing "new ^" from parse errors
tor@16862
   525
     * @todo Replace "end" insertion fix with "}" insertion
tor@16862
   526
     */
tor@16862
   527
    private boolean sanitizeSource(Context context, Sanitize sanitizing) {
tor@16862
   528
        int offset = context.caretOffset;
tor@16862
   529
tor@16862
   530
        // Let caretOffset represent the offset of the portion of the buffer we'll be operating on
tor@16862
   531
        if ((sanitizing == Sanitize.ERROR_DOT) || (sanitizing == Sanitize.ERROR_LINE)) {
tor@16862
   532
            offset = context.errorOffset;
tor@16862
   533
        }
tor@16862
   534
tor@16862
   535
        // Don't attempt cleaning up the source if we don't have the buffer position we need
tor@16862
   536
        if (offset == -1) {
tor@16862
   537
            return false;
tor@16862
   538
        }
tor@16862
   539
tor@16862
   540
        // The user might be editing around the given caretOffset.
tor@16862
   541
        // See if it looks modified
tor@16862
   542
        // Insert an end statement? Insert a } marker?
tor@16862
   543
        String doc = context.source;
tor@16862
   544
        if (offset > doc.length()) {
tor@16862
   545
            return false;
tor@16862
   546
        }
tor@16862
   547
tor@16862
   548
        try {
tor@16862
   549
            // Sometimes the offset shows up on the next line
tor@16862
   550
            if (GsfUtilities.isRowEmpty(doc, offset) || GsfUtilities.isRowWhite(doc, offset)) {
tor@16862
   551
                offset = GsfUtilities.getRowStart(doc, offset) - 1;
tor@16862
   552
                if (offset < 0) {
tor@16862
   553
                    offset = 0;
tor@16862
   554
                }
tor@16862
   555
            }
tor@16862
   556
tor@16862
   557
            if (!(GsfUtilities.isRowEmpty(doc, offset) || GsfUtilities.isRowWhite(doc, offset))) {
tor@16862
   558
                if ((sanitizing == Sanitize.EDITED_LINE) || (sanitizing == Sanitize.ERROR_LINE)) {
tor@16862
   559
                    // See if I should try to remove the current line, since it has text on it.
tor@16862
   560
                    int lineEnd = GsfUtilities.getRowLastNonWhite(doc, offset);
tor@16862
   561
tor@16862
   562
                    if (lineEnd != -1) {
tor@16862
   563
                        lineEnd++; // lineEnd is exclusive, not inclusive
tor@16862
   564
                        StringBuilder sb = new StringBuilder(doc.length());
tor@16862
   565
                        int lineStart = GsfUtilities.getRowStart(doc, offset);
tor@16862
   566
                        if (lineEnd >= lineStart + 2) {
tor@16862
   567
                            sb.append(doc.substring(0, lineStart));
tor@16862
   568
                            sb.append("//");
tor@16862
   569
                            int rest = lineStart + 2;
tor@16862
   570
                            if (rest < doc.length()) {
tor@16862
   571
                                sb.append(doc.substring(rest, doc.length()));
tor@16862
   572
                            }
tor@16862
   573
                        } else {
tor@16862
   574
                            // A line with just one character - can't replace with a comment
tor@16862
   575
                            // Just replace the char with a space
tor@16862
   576
                            sb.append(doc.substring(0, lineStart));
tor@16862
   577
                            sb.append(" ");
tor@16862
   578
                            int rest = lineStart + 1;
tor@16862
   579
                            if (rest < doc.length()) {
tor@16862
   580
                                sb.append(doc.substring(rest, doc.length()));
tor@16862
   581
                            }
tor@16862
   582
tor@16862
   583
                        }
tor@16862
   584
tor@16862
   585
                        assert sb.length() == doc.length();
tor@16862
   586
tor@16862
   587
                        context.sanitizedRange = new OffsetRange(lineStart, lineEnd);
tor@16862
   588
                        context.sanitizedSource = sb.toString();
tor@16862
   589
                        context.sanitizedContents = doc.substring(lineStart, lineEnd);
tor@16862
   590
                        return true;
tor@16862
   591
                    }
tor@16862
   592
                } else {
tor@16862
   593
                    assert sanitizing == Sanitize.ERROR_DOT || sanitizing == Sanitize.EDITED_DOT;
tor@16862
   594
                    // Try nuking dots/colons from this line
tor@16862
   595
                    // See if I should try to remove the current line, since it has text on it.
tor@16862
   596
                    int lineStart = GsfUtilities.getRowStart(doc, offset);
tor@16862
   597
                    int lineEnd = offset - 1;
tor@16862
   598
                    while (lineEnd >= lineStart && lineEnd < doc.length()) {
tor@16862
   599
                        if (!Character.isWhitespace(doc.charAt(lineEnd))) {
tor@16862
   600
                            break;
tor@16862
   601
                        }
tor@16862
   602
                        lineEnd--;
tor@16862
   603
                    }
tor@16862
   604
                    if (lineEnd > lineStart) {
tor@16862
   605
                        StringBuilder sb = new StringBuilder(doc.length());
tor@16862
   606
                        String line = doc.substring(lineStart, lineEnd + 1);
tor@16862
   607
                        int removeChars = 0;
tor@16862
   608
                        int removeEnd = lineEnd + 1;
tor@16862
   609
                        boolean isLineEnd = GsfUtilities.getRowLastNonWhite(context.source, lineEnd) <= lineEnd;
tor@16862
   610
tor@16862
   611
                        if (line.endsWith(".")) { // NOI18N
tor@16862
   612
                            removeChars = 1;
tor@16862
   613
                        } else if (line.endsWith("(")) { // NOI18N
tor@16862
   614
                            if (isLineEnd) {
tor@16862
   615
                                removeChars = 1;
tor@16862
   616
                            }
tor@16862
   617
                        } else if (line.endsWith(",")) { // NOI18N                            removeChars = 1;
tor@16862
   618
                            if (!isLineEnd) {
tor@16862
   619
                                removeChars = 1;
tor@16862
   620
                            }
tor@16862
   621
                        } else if (line.endsWith(", ")) { // NOI18N
tor@16862
   622
                            if (!isLineEnd) {
tor@16862
   623
                                removeChars = 2;
tor@16862
   624
                            }
tor@16862
   625
                        } else if (line.endsWith(",)")) { // NOI18N
tor@16862
   626
                            // Handle lone comma in parameter list - e.g.
tor@16862
   627
                            // type "foo(a," -> you end up with "foo(a,|)" which doesn't parse - but
tor@16862
   628
                            // the line ends with ")", not "," !
tor@16862
   629
                            // Just remove the comma
tor@16862
   630
                            removeChars = 1;
tor@16862
   631
                            removeEnd--;
tor@16862
   632
                        } else if (line.endsWith(", )")) { // NOI18N
tor@16862
   633
                            // Just remove the comma
tor@16862
   634
                            removeChars = 1;
tor@16862
   635
                            removeEnd -= 2;
tor@16862
   636
                        } else if (line.endsWith(" def") && isLineEnd) { // NOI18N
tor@16862
   637
                            removeChars = 3;
tor@16862
   638
                        } else {
tor@16862
   639
//                            // Make sure the line doesn't end with one of the JavaScript keywords
tor@16862
   640
//                            // (new, do, etc) - we can't handle that!
tor@16862
   641
//                            for (String keyword : PythonUtils.PYTHON_KEYWORDS) { // reserved words are okay
tor@16862
   642
//                                if (line.endsWith(keyword)) {
tor@16862
   643
//                                    if ("print".equals(keyword)) { // NOI18N
tor@16862
   644
//                                        // Only remove the keyword if it's the end of the line. Otherwise,
tor@16862
   645
//                                        // it could have just been typed in front of something (e.g. inserted a print) and we don't
tor@16862
   646
//                                        // want to confuse the parser with "va foo" instead of "var foo"
tor@16862
   647
//                                        if (!isLineEnd) {
tor@16862
   648
//                                            continue;
tor@16862
   649
//                                        }
tor@16862
   650
//                                    }
tor@16862
   651
//                                    removeChars = 1;
tor@16862
   652
//                                    break;
tor@16862
   653
//                                }
tor@16862
   654
//                            }
tor@16862
   655
                        }
tor@16862
   656
tor@16862
   657
                        if (removeChars == 0) {
tor@16862
   658
                            return false;
tor@16862
   659
                        }
tor@16862
   660
tor@16862
   661
                        int removeStart = removeEnd - removeChars;
tor@16862
   662
tor@16862
   663
                        sb.append(doc.substring(0, removeStart));
tor@16862
   664
tor@16862
   665
                        for (int i = 0; i < removeChars; i++) {
tor@16862
   666
                            sb.append(' ');
tor@16862
   667
                        }
tor@16862
   668
tor@16862
   669
                        if (removeEnd < doc.length()) {
tor@16862
   670
                            sb.append(doc.substring(removeEnd, doc.length()));
tor@16862
   671
                        }
tor@16862
   672
                        assert sb.length() == doc.length();
tor@16862
   673
tor@16862
   674
                        context.sanitizedRange = new OffsetRange(removeStart, removeEnd);
tor@16862
   675
                        context.sanitizedSource = sb.toString();
tor@16862
   676
                        context.sanitizedContents = doc.substring(removeStart, removeEnd);
tor@16862
   677
                        return true;
tor@16862
   678
                    }
tor@16862
   679
                }
tor@16862
   680
            }
tor@16862
   681
        } catch (BadLocationException ble) {
tor@16862
   682
            Exceptions.printStackTrace(ble);
tor@16862
   683
        }
tor@16862
   684
tor@16862
   685
        return false;
tor@16862
   686
    }
tor@16862
   687
tor@17173
   688
    private static int findLineOffset(String source, int line) {
tor@17173
   689
        int offset = -1;
tor@17173
   690
        for (int i = 0; i < line; i++) {
tor@17173
   691
            offset = source.indexOf("\n", offset+1);
tor@17173
   692
            if (offset == -1) {
tor@17173
   693
                return source.length();
tor@17173
   694
            }
tor@17173
   695
        }
tor@17173
   696
tor@17173
   697
        return Math.min(source.length(), offset+1);
tor@17173
   698
    }
tor@17173
   699
tor@16862
   700
    /** Attempts to sanitize the input buffer */
tor@16862
   701
    public static enum Sanitize {
tor@16862
   702
        /** Only parse the current file accurately, don't try heuristics */
tor@16862
   703
        NEVER,
tor@16862
   704
        /** Perform no sanitization */
tor@16862
   705
        NONE,
tor@16862
   706
        /** Try to remove the trailing . or :: at the caret line */
tor@16862
   707
        EDITED_DOT,
tor@16862
   708
        /** Try to remove the trailing . or :: at the error position, or the prior
tor@16862
   709
         * line, or the caret line */
tor@16862
   710
        ERROR_DOT,
tor@16862
   711
        /** Try to cut out the error line */
tor@16862
   712
        ERROR_LINE,
tor@16862
   713
        /** Try to cut out the current edited line, if known */
tor@16862
   714
        EDITED_LINE,
tor@16862
   715
    }
tor@16862
   716
juniel_katarn@18215
   717
    /** Sanitize context */
tor@16862
   718
    public static class Context {
juniel_katarn@18215
   719
        private FileObject file;
juniel_katarn@18215
   720
//        private ParseListener listener;
tor@16862
   721
        private int errorOffset;
tor@16862
   722
        private String source;
tor@16862
   723
        private String sanitizedSource;
tor@16862
   724
        private OffsetRange sanitizedRange = OffsetRange.NONE;
tor@16862
   725
        private String sanitizedContents;
tor@16862
   726
        private int caretOffset;
tor@16862
   727
        private Sanitize sanitized = Sanitize.NONE;
juniel_katarn@18215
   728
//        private TranslatedSource translatedSource;
juniel_katarn@18215
   729
//        private Parser.Job job;
juniel_katarn@18215
   730
        private Snapshot snapshot;
juniel_katarn@18215
   731
        private Task task;
juniel_katarn@18215
   732
        private SourceModificationEvent event;
juniel_katarn@18215
   733
//
juniel_katarn@18215
   734
//        public Context(ParserFile parserFile, ParseListener listener, String source, int caretOffset, TranslatedSource translatedSource, Parser.Job job) {
juniel_katarn@18215
   735
//            this.file = parserFile;
juniel_katarn@18215
   736
//            this.listener = listener;
juniel_katarn@18215
   737
//            this.source = source;
juniel_katarn@18215
   738
//            this.caretOffset = caretOffset;
juniel_katarn@18215
   739
//            this.translatedSource = translatedSource;
juniel_katarn@18215
   740
//            this.job = job;
juniel_katarn@18215
   741
//
juniel_katarn@18215
   742
//
juniel_katarn@18215
   743
//            if (caretOffset != -1) {
juniel_katarn@18215
   744
//                sanitized = Sanitize.EDITED_DOT;
juniel_katarn@18215
   745
//            }
juniel_katarn@18215
   746
//        }
juniel_katarn@18215
   747
//
juniel_katarn@18215
   748
//        @Override
juniel_katarn@18215
   749
//        public String toString() {
juniel_katarn@18215
   750
//            return "PythonParser.Context(" + file.toString() + ")"; // NOI18N
juniel_katarn@18215
   751
//        }
juniel_katarn@18215
   752
//
juniel_katarn@18215
   753
//        public OffsetRange getSanitizedRange() {
juniel_katarn@18215
   754
//            return sanitizedRange;
juniel_katarn@18215
   755
//        }
juniel_katarn@18215
   756
//
juniel_katarn@18215
   757
//        public Sanitize getSanitized() {
juniel_katarn@18215
   758
//            return sanitized;
juniel_katarn@18215
   759
//        }
juniel_katarn@18215
   760
//
juniel_katarn@18215
   761
//        public String getSanitizedSource() {
juniel_katarn@18215
   762
//            return sanitizedSource;
juniel_katarn@18215
   763
//        }
juniel_katarn@18215
   764
//
juniel_katarn@18215
   765
//        public int getErrorOffset() {
juniel_katarn@18215
   766
//            return errorOffset;
juniel_katarn@18215
   767
//        }
tor@16862
   768
    }
tor@16862
   769
}