hg/netbeans/contrib: python.source/src/org/netbeans/modules/python/source/PythonParser.java@517409415907 (annotated)

tor@16862	1	/*
tor@16862	2	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
tor@16862	3	*
jglick@17225	4	* Copyright 1997-2010 Oracle and/or its affiliates. All rights reserved.
jglick@17225	5	*
jglick@17225	6	* Oracle and Java are registered trademarks of Oracle and/or its affiliates.
jglick@17225	7	* Other names may be trademarks of their respective owners.
tor@16862	8	*
tor@16862	9	* The contents of this file are subject to the terms of either the GNU
tor@16862	10	* General Public License Version 2 only ("GPL") or the Common
tor@16862	11	* Development and Distribution License("CDDL") (collectively, the
tor@16862	12	* "License"). You may not use this file except in compliance with the
tor@16862	13	* License. You can obtain a copy of the License at
tor@16862	14	* http://www.netbeans.org/cddl-gplv2.html
tor@16862	15	* or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
tor@16862	16	* specific language governing permissions and limitations under the
tor@16862	17	* License. When distributing the software, include this License Header
tor@16862	18	* Notice in each file and include the License file at
jglick@17225	19	* nbbuild/licenses/CDDL-GPL-2-CP. Oracle designates this
tor@16862	20	* particular file as subject to the "Classpath" exception as provided
jglick@17225	21	* by Oracle in the GPL Version 2 section of the License file that
tor@16862	22	* accompanied this code. If applicable, add the following below the
tor@16862	23	* License Header, with the fields enclosed by brackets [] replaced by
tor@16862	24	* your own identifying information:
tor@16862	25	* "Portions Copyrighted [year] [name of copyright owner]"
tor@16862	26	*
tor@16862	27	* Contributor(s):
tor@16862	28	*
tor@16862	29	* Portions Copyrighted 2007 Sun Microsystems, Inc.
tor@16862	30	*/
ralphbenjamin@18313	31	package org.netbeans.modules.python.source;
tor@16862	32
tor@17137	33	import java.io.InputStream;
tor@17137	34	import java.io.InputStreamReader;
tor@16862	35	import java.util.ArrayList;
tor@16862	36	import java.util.List;
tor@16862	37	import java.util.logging.Level;
tor@16862	38	import java.util.logging.Logger;
juniel_katarn@18215	39	import javax.swing.event.ChangeListener;
tor@16862	40	import javax.swing.text.BadLocationException;
juniel_katarn@18215	41	import org.netbeans.modules.csl.api.Severity;
juniel_katarn@18215	42	import org.netbeans.modules.csl.spi.DefaultError;
juniel_katarn@18215	43	import org.netbeans.modules.csl.api.Error;
juniel_katarn@18215	44	import org.netbeans.modules.csl.api.OffsetRange;
juniel_katarn@18215	45	import org.netbeans.modules.csl.spi.GsfUtilities;
juniel_katarn@18215	46	import org.netbeans.modules.parsing.api.Snapshot;
juniel_katarn@18215	47	import org.netbeans.modules.parsing.api.Task;
juniel_katarn@18215	48	import org.netbeans.modules.parsing.spi.Parser;
juniel_katarn@18215	49	import org.netbeans.modules.parsing.spi.SourceModificationEvent;
vincentvdl@18286	50	import org.netbeans.modules.python.api.PythonFileEncodingQuery;
juniel_katarn@18215	51	import org.openide.filesystems.FileObject;
tor@17030	52	import org.python.antlr.runtime.ANTLRStringStream;
tor@17030	53	import org.python.antlr.runtime.BaseRecognizer;
tor@17030	54	import org.python.antlr.runtime.BitSet;
tor@17030	55	import org.python.antlr.runtime.CommonToken;
tor@17137	56	import org.python.antlr.runtime.CommonTokenStream;
tor@17030	57	import org.python.antlr.runtime.IntStream;
tor@17030	58	import org.python.antlr.runtime.Lexer;
tor@17030	59	import org.python.antlr.runtime.MismatchedTokenException;
tor@17030	60	import org.python.antlr.runtime.RecognitionException;
tor@16862	61
tor@16862	62	import org.openide.filesystems.FileUtil;
tor@16862	63	import org.openide.util.Exceptions;
tor@16862	64	import org.python.antlr.ListErrorHandler;
tor@16862	65	import org.python.antlr.ParseException;
tor@17137	66	import org.python.antlr.PythonLexer;
tor@17137	67	import org.python.antlr.PythonTokenSource;
tor@16862	68	import org.python.antlr.PythonTree;
tor@17137	69	import org.python.antlr.PythonTreeAdaptor;
tor@16923	70	import org.python.antlr.base.expr;
tor@16923	71	import org.python.antlr.base.mod;
tor@16923	72	import org.python.antlr.base.slice;
tor@16923	73	import org.python.antlr.base.stmt;
tor@17137	74	import org.python.antlr.runtime.ANTLRReaderStream;
tor@17137	75	import org.python.antlr.runtime.CharStream;
jenselme@18429	76	import org.python.core.PyException;
tor@16862	77
tor@16862	78	/**
tor@16862	79	* Parser for Python. Wraps Jython.
tor@16862	80	*
tor@16862	81	* @author Frank Wierzbicki
tor@16862	82	* @author Tor Norbye
tor@16862	83	*/
juniel_katarn@18215	84	public class PythonParser extends Parser {
tor@16862	85	/** For unit tests such that they can make sure we didn't have a parser abort */
tor@16862	86	static Throwable runtimeException;
tor@16862	87
tor@16893	88	static {
tor@16893	89	org.python.core.PySystemState.initialize();
tor@16893	90	}
juniel_katarn@18215	91
juniel_katarn@18215	92	private Result lastResult;
vincentvdl@18286	93	private final PythonFileEncodingQuery fileEncodingQuery = new PythonFileEncodingQuery();
vincentvdl@18286	94	private String headerCached = null;
vincentvdl@18286	95	private String encodingCache = null;
tor@16893	96
tor@17137	97	public mod file_input(CharStream charStream, String fileName) throws RecognitionException {
tor@17137	98	ListErrorHandler eh = new ListErrorHandler();
tor@17137	99	mod tree = null;
jenselme@18273	100	PythonLexer lexer = new PythonLexer(charStream);
tor@17137	101	lexer.setErrorHandler(eh);
tor@17137	102	CommonTokenStream tokens = new CommonTokenStream(lexer);
tor@17137	103	tokens.discardOffChannelTokens(true);
tor@17137	104	PythonTokenSource indentedSource = new PythonTokenSource(tokens, fileName);
tor@17137	105	tokens = new CommonTokenStream(indentedSource);
tor@17137	106	org.python.antlr.PythonParser parser = new org.python.antlr.PythonParser(tokens);
tor@17137	107	parser.setTreeAdaptor(new PythonTreeAdaptor());
tor@17137	108	parser.setErrorHandler(eh);
tor@17137	109	org.python.antlr.PythonParser.file_input_return r = parser.file_input();
tor@17137	110	tree = (mod)r.getTree();
tor@17137	111	return tree;
tor@17137	112	}
tor@17137	113
juniel_katarn@18215	114	@Override
juniel_katarn@18215	115	public void addChangeListener(ChangeListener changeListener) {}
juniel_katarn@18215	116
juniel_katarn@18215	117	@Override
juniel_katarn@18215	118	public void removeChangeListener(ChangeListener changeListener) {}
juniel_katarn@18215	119
tor@17137	120	public PythonTree parse(InputStream istream, String fileName) throws Exception {
tor@17137	121	InputStreamReader reader = new InputStreamReader(istream, "ISO-8859-1");
tor@17137	122	return file_input(new ANTLRReaderStream(reader), fileName);
tor@17137	123	}
juniel_katarn@18215	124
juniel_katarn@18215	125	@Override
juniel_katarn@18215	126	public final Result getResult(Task task) throws org.netbeans.modules.parsing.spi.ParseException {
juniel_katarn@18215	127	return lastResult;
juniel_katarn@18215	128	}
juniel_katarn@18215	129
juniel_katarn@18215	130	private static final Logger LOG = Logger.getLogger(PythonParser.class.getName());
tor@17137	131
juniel_katarn@18215	132	@Override
juniel_katarn@18215	133	public void parse(Snapshot snapshot, Task task, SourceModificationEvent event) throws org.netbeans.modules.parsing.spi.ParseException {
juniel_katarn@18215	134	Context context = new Context();
juniel_katarn@18215	135	context.snapshot = snapshot;
juniel_katarn@18215	136	context.event = event;
juniel_katarn@18215	137	context.task = task;
juniel_katarn@18215	138	context.caretOffset = GsfUtilities.getLastKnownCaretOffset(snapshot, event);
juniel_katarn@18215	139	context.source = snapshot.getText().toString();
juniel_katarn@18215	140	context.file = snapshot.getSource().getFileObject();
ralphbenjamin@18243	141	if(context.file == null) {
ralphbenjamin@18243	142	return; // TODO: parse the source, not the file
ralphbenjamin@18243	143	}
juniel_katarn@18215	144	/* Let's not sanitize ;-) Would be great if we could have a more robust parser
juniel_katarn@18215	145	if (context.caretOffset != -1) {
juniel_katarn@18215	146	context.sanitized = Sanitize.EDITED_DOT;
juniel_katarn@18215	147	}
juniel_katarn@18215	148	*/
juniel_katarn@18215	149	lastResult = parse(context, context.sanitized);
juniel_katarn@18215	150	}
juniel_katarn@18215	151	public PythonParserResult parse(final Context context, Sanitize sanitizing) {
tor@16862	152	boolean sanitizedSource = false;
tor@16862	153	String sourceCode = context.source;
tor@16862	154	if (!((sanitizing == Sanitize.NONE) \|\| (sanitizing == Sanitize.NEVER))) {
tor@16862	155	boolean ok = sanitizeSource(context, sanitizing);
tor@16862	156
tor@16862	157	if (ok) {
tor@16862	158	assert context.sanitizedSource != null;
tor@16862	159	sanitizedSource = true;
tor@16862	160	sourceCode = context.sanitizedSource;
tor@16862	161	} else {
tor@16862	162	// Try next trick
tor@16862	163	return sanitize(context, sanitizing);
tor@16862	164	}
tor@16862	165	}
tor@16862	166	final String source = sourceCode;
tor@16862	167
tor@16862	168	if (sanitizing == Sanitize.NONE) {
tor@16862	169	context.errorOffset = -1;
tor@16862	170	}
tor@16862	171
ralphbenjamin@18263	172	final List<Error> errors = new ArrayList<>();
juniel_katarn@18215	173	final FileObject file = context.file;
tor@16862	174	try {
tor@16862	175	String fileName = file.getNameExt();
tor@16923	176	// TODO - sniff file headers etc. Frank's comment:
tor@16923	177	// Longer term for Python compatibility, having NetBeans sniff the top two lines
tor@16923	178	// for an encoding would be the right thing to do from a pure Python
tor@16923	179	// compatibility standard (see http://www.python.org/dev/peps/pep-0263/) I
tor@16923	180	// have pep-0263 code in Jython that I could probably extract for this
tor@16923	181	// purpose down the road.
tor@16923	182	//String charset = "ISO8859_1"; // NOI18N
tor@16923	183	//String charset = "UTF-8"; // NOI18N
tor@16923	184	//String charset = "iso8859_1"; // NOI18N
vincentvdl@18286	185	// TODO: improve this check.
vincentvdl@18286	186	int cache_len = sourceCode.length() >= 64 ? 64 : sourceCode.length();
vincentvdl@18286	187	if (headerCached == null \|\| cache_len != headerCached.length() \|\| !headerCached.equals(sourceCode.substring(0, cache_len))) {
vincentvdl@18286	188	headerCached = sourceCode.substring(0, cache_len);
vincentvdl@18286	189	encodingCache = fileEncodingQuery.getPythonFileEncoding(sourceCode.split("\n", 2));
vincentvdl@18286	190	}
vincentvdl@18286	191	String charset = encodingCache;
vincentvdl@18286	192
tor@16862	193	final boolean ignoreErrors = sanitizedSource;
tor@16862	194	ListErrorHandler errorHandler = new ListErrorHandler() {
tor@16862	195	@Override
tor@16862	196	public void error(String message, PythonTree t) {
juniel_katarn@18215	197	errors.add(new DefaultError(null, message, null, file, t.getCharStartIndex(), t.getCharStopIndex(), Severity.ERROR));
tor@16862	198	super.error(message, t);
tor@16862	199	}
tor@16862	200
tor@16862	201	@Override
tor@16923	202	public expr errorExpr(PythonTree t) {
tor@16862	203	return super.errorExpr(t);
tor@16862	204	}
tor@16862	205
tor@16862	206	@Override
tor@16923	207	public mod errorMod(PythonTree t) {
tor@16862	208	return super.errorMod(t);
tor@16862	209	}
tor@16862	210
tor@16862	211	@Override
tor@16923	212	public slice errorSlice(PythonTree t) {
tor@16862	213	return super.errorSlice(t);
tor@16862	214	}
tor@16862	215
tor@16862	216	@Override
tor@16923	217	public stmt errorStmt(PythonTree t) {
tor@16862	218	return super.errorStmt(t);
tor@16862	219	}
tor@16862	220
tor@16862	221	@Override
tor@16862	222	public boolean mismatch(BaseRecognizer br, IntStream input, int ttype, BitSet follow) {
tor@16862	223	return super.mismatch(br, input, ttype, follow);
tor@16862	224	}
tor@16862	225
tor@16862	226	@Override
tor@16862	227	public Object recoverFromMismatchedToken(BaseRecognizer br, IntStream input, int ttype, BitSet follow) {
tor@17173	228	MismatchedTokenException mt = new MismatchedTokenException(ttype, input);
tor@17173	229	String message = br.getErrorMessage(mt, br.getTokenNames());
tor@17173	230	if (mt.line >= 1) {
tor@17173	231	int lineOffset = findLineOffset(context.source, mt.line-1);
tor@17173	232	if (mt.charPositionInLine > 0) {
tor@17173	233	lineOffset += mt.charPositionInLine;
tor@17173	234	}
tor@17173	235	int start = lineOffset;//t.getCharStartIndex();
tor@17173	236	int stop = lineOffset;//t.getCharStopIndex();
juniel_katarn@18215	237	errors.add(new DefaultError(null, message, null, file, start, stop, Severity.ERROR));
tor@17173	238	}
tor@16862	239	return super.recoverFromMismatchedToken(br, input, ttype, follow);
tor@16862	240	}
tor@16862	241
tor@16862	242	@Override
tor@16862	243	public void recover(Lexer lex, RecognitionException re) {
tor@16862	244	super.recover(lex, re);
tor@16862	245	}
tor@16862	246
tor@16862	247	@Override
tor@16862	248	public void recover(BaseRecognizer br, IntStream input, RecognitionException re) {
tor@16862	249	super.recover(br, input, re);
tor@16862	250	}
tor@16862	251
tor@16862	252	@Override
tor@16862	253	public void reportError(BaseRecognizer br, RecognitionException re) {
tor@16862	254	if (!ignoreErrors) {
tor@16862	255	String message = br.getErrorMessage(re, br.getTokenNames());
tor@16862	256	if (message == null \|\| message.length() == 0) {
tor@16862	257	message = re.getMessage();
tor@16862	258	}
tor@16862	259	if (message == null) {
tor@16862	260	//message = re.getUnexpectedType();
tor@16862	261	message = re.toString();
tor@16862	262	}
tor@16862	263	int start = re.index;
tor@16862	264
tor@16862	265	// Try to find the line offset. re.index doesn't do the trick.
tor@16862	266	start = PythonUtils.getOffsetByLineCol(source, re.line - 1, 0); // -1: 0-based
tor@16862	267	int end = start;
tor@16862	268	if (re.charPositionInLine > 0) {
tor@16862	269	try {
tor@16862	270	end = GsfUtilities.getRowLastNonWhite(source, start) + 1;
tor@16862	271	start += re.charPositionInLine;
tor@16862	272	if (end < start) {
tor@16862	273	end = start;
tor@16862	274	}
tor@16862	275	} catch (BadLocationException ex) {
tor@16862	276	Exceptions.printStackTrace(ex);
tor@16862	277	end = start;
tor@16862	278	}
tor@16862	279	if (end == 0) {
tor@16862	280	end = start;
tor@16862	281	}
tor@16862	282	}
tor@16862	283
tor@16862	284	// Some errors have better offsets if we look at the token stream
tor@16862	285	if (re instanceof MismatchedTokenException) {
tor@16862	286	MismatchedTokenException m = (MismatchedTokenException)re;
tor@16862	287	if (m.token != null) {
tor@17030	288	if (m.token instanceof org.python.antlr.runtime.CommonToken) {
tor@17030	289	CommonToken token = (org.python.antlr.runtime.CommonToken)m.token;
tor@16862	290	start = token.getStartIndex();
tor@16862	291	end = token.getStopIndex();
tor@16862	292	}
tor@16862	293	}
tor@16862	294	}
tor@16862	295
tor@16862	296	if (start > source.length()) {
tor@16862	297	start = source.length();
tor@16862	298	end = start;
tor@16862	299	}
tor@16862	300
juniel_katarn@18215	301	errors.add(new DefaultError(null, message, null, file, start, end, Severity.ERROR));
tor@16862	302
jenselme@18295	303	// In order to avoid a StackOverflowError, the BaseRecognizer must be recreated.
jenselme@18295	304	// We must keep the names of the tokens to avoid a NullPointerException.
jenselme@18295	305	// See bz252630
jenselme@18295	306	final String[] tokenNames = br.getTokenNames();
jenselme@18273	307	br = new BaseRecognizer() {
jenselme@18273	308
jenselme@18273	309	@Override
jenselme@18273	310	public String getSourceName() {
jenselme@18273	311	return file.getName();
jenselme@18273	312	}
jenselme@18295	313
jenselme@18295	314	@Override
jenselme@18295	315	public String[] getTokenNames() {
jenselme@18295	316	return tokenNames;
jenselme@18295	317	}
jenselme@18273	318	};
jenselme@18295	319
jenselme@18295	320	super.reportError(br, re);
tor@16862	321	}
tor@16862	322	}
tor@16862	323	};
tor@17137	324
jenselme@18273	325	PythonLexer lexer = new PythonLexer(new ANTLRStringStream(sourceCode));
tor@17137	326	lexer.setErrorHandler(errorHandler);
tor@17137	327	CommonTokenStream tokens = new CommonTokenStream(lexer);
tor@17137	328	tokens.discardOffChannelTokens(true);
tor@17137	329	PythonTokenSource indentedSource = new PythonTokenSource(tokens, fileName);
juniel_katarn@18215	330	CommonTokenStream indentedTokens = new CommonTokenStream(indentedSource);
jenselme@18298	331	// Import line ending with a dot raise a NullPointerException in
jenselme@18298	332	// org.python.antlr.GrammarActions.makeDottedText called from parser.file_input
jenselme@18298	333	// sanitizeImportTokens will remove the dot token from the list of tokens in
jenselme@18298	334	// indentedTokens to avoid the bug and add an error at this file.
jenselme@18298	335	// See https://netbeans.org/bugzilla/show_bug.cgi?id=252356
jenselme@18298	336	sanitizeImportTokens(indentedTokens, errors, file);
vincentvdl@18286	337	org.python.antlr.PythonParser parser;
vincentvdl@18286	338	if (charset != null) {
vincentvdl@18286	339	parser = new org.python.antlr.PythonParser(indentedTokens, charset);
vincentvdl@18286	340	} else {
vincentvdl@18286	341	parser = new org.python.antlr.PythonParser(indentedTokens);
vincentvdl@18286	342	}
tor@17137	343	parser.setTreeAdaptor(new PythonTreeAdaptor());
tor@17137	344	parser.setErrorHandler(errorHandler);
tor@17137	345	org.python.antlr.PythonParser.file_input_return r = parser.file_input();
tor@17137	346	PythonTree t = (PythonTree)r.getTree();
juniel_katarn@18215	347	PythonParserResult result = new PythonParserResult(t, context.snapshot);
juniel_katarn@18215	348	result.setErrors(errors);
tor@16862	349
tor@16862	350	result.setSanitized(context.sanitized, context.sanitizedRange, context.sanitizedContents);
tor@16862	351	result.setSource(sourceCode);
tor@16862	352
tor@16862	353	return result;
tor@16862	354	} catch (ParseException pe) {
tor@16862	355	if (sanitizing == Sanitize.NONE) {
tor@16862	356	PythonParserResult sanitizedResult = sanitize(context, sanitizing);
tor@16862	357	if (sanitizedResult.isValid()) {
tor@16862	358	return sanitizedResult;
tor@16862	359	} else {
tor@16862	360	int offset = pe.index;
tor@16862	361	assert offset >= 0;
tor@16862	362	String desc = pe.getLocalizedMessage();
tor@16862	363	if (desc == null) {
tor@16862	364	desc = pe.getMessage();
tor@16862	365	}
juniel_katarn@18215	366	DefaultError error = new DefaultError(null /key/, desc, null, file, offset, offset, Severity.ERROR);
juniel_katarn@18215	367	PythonParserResult parserResult = new PythonParserResult(null, context.snapshot);
tor@16862	368	parserResult.addError(error);
tor@16862	369	for (Error e : errors) {
tor@16862	370	parserResult.addError(e);
tor@16862	371	}
tor@16862	372
tor@16862	373	return parserResult;
tor@16862	374	}
tor@16862	375	} else {
tor@16862	376	return sanitize(context, sanitizing);
tor@16862	377	}
jenselme@18429	378
jenselme@18429	379	} catch (PyException e) {
jenselme@18429	380	// This is issue 251705
jenselme@18429	381	Logger.getLogger(this.getClass().getName()).log(Level.WARNING, e.getMessage());
jenselme@18429	382	return new PythonParserResult(null, context.snapshot);
jenselme@18392	383	} catch (IllegalArgumentException e) {
jenselme@18392	384	Logger.getLogger(this.getClass().getName()).log(Level.WARNING, e.getMessage());
jenselme@18392	385	return new PythonParserResult(null, context.snapshot);
tor@16862	386	} catch (NullPointerException e) {
tor@16862	387	String fileName = "";
juniel_katarn@18215	388	if (file != null) {
juniel_katarn@18215	389	fileName = FileUtil.getFileDisplayName(file);
tor@16862	390	}
juniel_katarn@18215	391	e = Exceptions.attachMessage(e, "Was parsing " + fileName);
jenselme@18379	392	Logger.getLogger(this.getClass().getName()).log(Level.WARNING, e.getMessage());
juniel_katarn@18215	393	return new PythonParserResult(null, context.snapshot);
tor@16862	394	} catch (Throwable t) {
tor@16862	395	runtimeException = t;
tor@16862	396	StackTraceElement[] stackTrace = t.getStackTrace();
juniel_katarn@18221	397	if (stackTrace != null && stackTrace.length > 0 && stackTrace[0].getClassName().startsWith("org.python.antlr")) {//.runtime.tree.RewriteRuleElementStream")) {
tor@16862	398	// This is issue 150921
tor@16862	399	// Don't bug user about it -- we already know
tor@16862	400	Logger.getLogger(this.getClass().getName()).log(Level.FINE, "Encountered issue #150921", t);
tor@16862	401	} else {
juniel_katarn@18215	402	t = Exceptions.attachMessage(t, "Was parsing " + FileUtil.getFileDisplayName(file));
tor@16862	403	Exceptions.printStackTrace(t);
tor@16862	404	}
juniel_katarn@18215	405	return new PythonParserResult(null, context.snapshot);
tor@16862	406	}
tor@16862	407	}
tor@16862	408
jenselme@18298	409	private void sanitizeImportTokens(CommonTokenStream indentedTokens, List errors, FileObject file) {
jenselme@18298	410	List tokens = indentedTokens.getTokens();
jenselme@18298	411	List<CommonToken> tokensToRemove = new ArrayList<>();
jenselme@18298	412	int i = 0;
jenselme@18298	413	while (i < tokens.size()) {
jenselme@18298	414	CommonToken importToken = (CommonToken)tokens.get(i);
jenselme@18298	415	if ("import".equals(importToken.getText()) \|\| "from".equals(importToken.getText())) {
jenselme@18298	416	// sanitizeDotTokens return the index of the token that starts the next line
jenselme@18298	417	i = sanitizeDotTokens(tokens, tokensToRemove, importToken, i + 1, errors, file);
jenselme@18298	418	} else {
jenselme@18298	419	i++;
jenselme@18298	420	}
jenselme@18298	421	}
jenselme@18298	422
jenselme@18298	423	for (CommonToken token : tokensToRemove) {
jenselme@18298	424	tokens.remove(token);
jenselme@18298	425	}
jenselme@18298	426	}
jenselme@18298	427
jenselme@18298	428	private int sanitizeDotTokens(List tokens, List tokensToRemove, CommonToken importToken,
jenselme@18298	429	int startIndex, List errors, FileObject file) {
jenselme@18298	430	for (int j = startIndex; j < tokens.size() - 1; j++) {
jenselme@18298	431	CommonToken dotToken = (CommonToken)tokens.get(j);
jenselme@18298	432	CommonToken nextToken = (CommonToken)tokens.get(j + 1);
jenselme@18298	433	if (".".equals(dotToken.getText())) {
jenselme@18298	434	if (nextToken.getText().startsWith("\n")) {
jenselme@18298	435	tokensToRemove.add(dotToken);
jenselme@18298	436	String rawTokenText;
jenselme@18298	437	if (nextToken.getText().startsWith("\n")) {
jenselme@18298	438	rawTokenText = "\\n";
jenselme@18298	439	} else {
jenselme@18298	440	rawTokenText = " ";
jenselme@18298	441	}
jenselme@18298	442	errors.add(
jenselme@18298	443	new DefaultError(null, "Mismatch input '.' expecting NAME\nMissing NAME at '" + rawTokenText + "'",
jenselme@18298	444	null, file, importToken.getStartIndex(), dotToken.getStopIndex(), Severity.ERROR));
jenselme@18298	445	}
jenselme@18298	446	} else if ("\n".equals(nextToken.getText())) { // End of line, must continue looping from external loop
jenselme@18298	447	return j + 1;
jenselme@18298	448	}
jenselme@18298	449	}
jenselme@18298	450
jenselme@18298	451	return startIndex;
jenselme@18298	452	}
jenselme@18298	453
tor@16862	454	private static String asString(CharSequence sequence) {
tor@16862	455	if (sequence instanceof String) {
tor@16862	456	return (String)sequence;
tor@16862	457	} else {
tor@16862	458	return sequence.toString();
tor@16862	459	}
tor@16862	460	}
tor@16862	461
tor@16862	462
tor@16862	463	@SuppressWarnings("fallthrough")
juniel_katarn@18215	464	private PythonParserResult sanitize(final Context context, final Sanitize sanitizing) {
tor@16862	465
tor@16862	466	switch (sanitizing) {
tor@16862	467	case NEVER:
juniel_katarn@18215	468	return new PythonParserResult(null, context.snapshot);
tor@16862	469
tor@17208	470	case NONE:
tor@17208	471	if (context.caretOffset != -1) {
tor@17208	472	return parse(context, Sanitize.EDITED_DOT);
tor@17208	473	}
tor@17208	474
tor@16862	475	case EDITED_DOT:
tor@16862	476	// We've tried removing whitespace around the edit location
tor@17208	477	// Fall through to try parsing with removing stuff around error location
tor@16862	478	// (Don't bother doing this if errorOffset==caretOffset since that would try the same
tor@16862	479	// source as EDITED_DOT which has no better chance of succeeding...)
tor@16862	480	if (context.errorOffset != -1 && context.errorOffset != context.caretOffset) {
tor@16862	481	return parse(context, Sanitize.ERROR_DOT);
tor@16862	482	}
tor@16862	483
tor@16862	484	// Fall through to try the next trick
tor@16862	485	case ERROR_DOT:
tor@16862	486
tor@16862	487	// We've tried removing dots - now try removing the whole line at the error position
tor@16862	488	if (context.errorOffset != -1) {
tor@16862	489	return parse(context, Sanitize.ERROR_LINE);
tor@16862	490	}
tor@16862	491
tor@16862	492	// Fall through to try the next trick
tor@16862	493	case ERROR_LINE:
tor@16862	494
tor@16862	495	// Messing with the error line didn't work - we could try "around" the error line
tor@16862	496	// but I'm not attempting that now.
tor@16862	497	// Finally try removing the whole line around the user editing position
tor@16862	498	// (which could be far from where the error is showing up - but if you're typing
tor@16862	499	// say a new "def" statement in a class, this will show up as an error on a mismatched
tor@16862	500	// "end" statement rather than here
tor@16862	501	if (context.caretOffset != -1) {
tor@16862	502	return parse(context, Sanitize.EDITED_LINE);
tor@16862	503	}
tor@16862	504
tor@16862	505	// Fall through for default handling
tor@16862	506	case EDITED_LINE:
tor@16862	507	default:
tor@16862	508	// We're out of tricks - just return the failed parse result
juniel_katarn@18215	509	return new PythonParserResult(null, context.snapshot);
tor@16862	510	}
tor@16862	511	}
tor@16862	512
tor@16862	513	/**
tor@16862	514	* Try cleaning up the source buffer around the current offset to increase
tor@16862	515	* likelihood of parse success. Initially this method had a lot of
tor@16862	516	* logic to determine whether a parse was likely to fail (e.g. invoking
tor@16862	517	* the isEndMissing method from bracket completion etc.).
tor@16862	518	* However, I am now trying a parse with the real source first, and then
tor@16862	519	* only if that fails do I try parsing with sanitized source. Therefore,
tor@16862	520	* this method has to be less conservative in ripping out code since it
tor@16862	521	* will only be used when the regular source is failing.
tor@16862	522	*
tor@16862	523	* @todo Automatically close current statement by inserting ";"
tor@16862	524	* @todo Handle sanitizing "new ^" from parse errors
tor@16862	525	* @todo Replace "end" insertion fix with "}" insertion
tor@16862	526	*/
tor@16862	527	private boolean sanitizeSource(Context context, Sanitize sanitizing) {
tor@16862	528	int offset = context.caretOffset;
tor@16862	529
tor@16862	530	// Let caretOffset represent the offset of the portion of the buffer we'll be operating on
tor@16862	531	if ((sanitizing == Sanitize.ERROR_DOT) \|\| (sanitizing == Sanitize.ERROR_LINE)) {
tor@16862	532	offset = context.errorOffset;
tor@16862	533	}
tor@16862	534
tor@16862	535	// Don't attempt cleaning up the source if we don't have the buffer position we need
tor@16862	536	if (offset == -1) {
tor@16862	537	return false;
tor@16862	538	}
tor@16862	539
tor@16862	540	// The user might be editing around the given caretOffset.
tor@16862	541	// See if it looks modified
tor@16862	542	// Insert an end statement? Insert a } marker?
tor@16862	543	String doc = context.source;
tor@16862	544	if (offset > doc.length()) {
tor@16862	545	return false;
tor@16862	546	}
tor@16862	547
tor@16862	548	try {
tor@16862	549	// Sometimes the offset shows up on the next line
tor@16862	550	if (GsfUtilities.isRowEmpty(doc, offset) \|\| GsfUtilities.isRowWhite(doc, offset)) {
tor@16862	551	offset = GsfUtilities.getRowStart(doc, offset) - 1;
tor@16862	552	if (offset < 0) {
tor@16862	553	offset = 0;
tor@16862	554	}
tor@16862	555	}
tor@16862	556
tor@16862	557	if (!(GsfUtilities.isRowEmpty(doc, offset) \|\| GsfUtilities.isRowWhite(doc, offset))) {
tor@16862	558	if ((sanitizing == Sanitize.EDITED_LINE) \|\| (sanitizing == Sanitize.ERROR_LINE)) {
tor@16862	559	// See if I should try to remove the current line, since it has text on it.
tor@16862	560	int lineEnd = GsfUtilities.getRowLastNonWhite(doc, offset);
tor@16862	561
tor@16862	562	if (lineEnd != -1) {
tor@16862	563	lineEnd++; // lineEnd is exclusive, not inclusive
tor@16862	564	StringBuilder sb = new StringBuilder(doc.length());
tor@16862	565	int lineStart = GsfUtilities.getRowStart(doc, offset);
tor@16862	566	if (lineEnd >= lineStart + 2) {
tor@16862	567	sb.append(doc.substring(0, lineStart));
tor@16862	568	sb.append("//");
tor@16862	569	int rest = lineStart + 2;
tor@16862	570	if (rest < doc.length()) {
tor@16862	571	sb.append(doc.substring(rest, doc.length()));
tor@16862	572	}
tor@16862	573	} else {
tor@16862	574	// A line with just one character - can't replace with a comment
tor@16862	575	// Just replace the char with a space
tor@16862	576	sb.append(doc.substring(0, lineStart));
tor@16862	577	sb.append(" ");
tor@16862	578	int rest = lineStart + 1;
tor@16862	579	if (rest < doc.length()) {
tor@16862	580	sb.append(doc.substring(rest, doc.length()));
tor@16862	581	}
tor@16862	582
tor@16862	583	}
tor@16862	584
tor@16862	585	assert sb.length() == doc.length();
tor@16862	586
tor@16862	587	context.sanitizedRange = new OffsetRange(lineStart, lineEnd);
tor@16862	588	context.sanitizedSource = sb.toString();
tor@16862	589	context.sanitizedContents = doc.substring(lineStart, lineEnd);
tor@16862	590	return true;
tor@16862	591	}
tor@16862	592	} else {
tor@16862	593	assert sanitizing == Sanitize.ERROR_DOT \|\| sanitizing == Sanitize.EDITED_DOT;
tor@16862	594	// Try nuking dots/colons from this line
tor@16862	595	// See if I should try to remove the current line, since it has text on it.
tor@16862	596	int lineStart = GsfUtilities.getRowStart(doc, offset);
tor@16862	597	int lineEnd = offset - 1;
tor@16862	598	while (lineEnd >= lineStart && lineEnd < doc.length()) {
tor@16862	599	if (!Character.isWhitespace(doc.charAt(lineEnd))) {
tor@16862	600	break;
tor@16862	601	}
tor@16862	602	lineEnd--;
tor@16862	603	}
tor@16862	604	if (lineEnd > lineStart) {
tor@16862	605	StringBuilder sb = new StringBuilder(doc.length());
tor@16862	606	String line = doc.substring(lineStart, lineEnd + 1);
tor@16862	607	int removeChars = 0;
tor@16862	608	int removeEnd = lineEnd + 1;
tor@16862	609	boolean isLineEnd = GsfUtilities.getRowLastNonWhite(context.source, lineEnd) <= lineEnd;
tor@16862	610
tor@16862	611	if (line.endsWith(".")) { // NOI18N
tor@16862	612	removeChars = 1;
tor@16862	613	} else if (line.endsWith("(")) { // NOI18N
tor@16862	614	if (isLineEnd) {
tor@16862	615	removeChars = 1;
tor@16862	616	}
tor@16862	617	} else if (line.endsWith(",")) { // NOI18N removeChars = 1;
tor@16862	618	if (!isLineEnd) {
tor@16862	619	removeChars = 1;
tor@16862	620	}
tor@16862	621	} else if (line.endsWith(", ")) { // NOI18N
tor@16862	622	if (!isLineEnd) {
tor@16862	623	removeChars = 2;
tor@16862	624	}
tor@16862	625	} else if (line.endsWith(",)")) { // NOI18N
tor@16862	626	// Handle lone comma in parameter list - e.g.
tor@16862	627	// type "foo(a," -> you end up with "foo(a,\|)" which doesn't parse - but
tor@16862	628	// the line ends with ")", not "," !
tor@16862	629	// Just remove the comma
tor@16862	630	removeChars = 1;
tor@16862	631	removeEnd--;
tor@16862	632	} else if (line.endsWith(", )")) { // NOI18N
tor@16862	633	// Just remove the comma
tor@16862	634	removeChars = 1;
tor@16862	635	removeEnd -= 2;
tor@16862	636	} else if (line.endsWith(" def") && isLineEnd) { // NOI18N
tor@16862	637	removeChars = 3;
tor@16862	638	} else {
tor@16862	639	// // Make sure the line doesn't end with one of the JavaScript keywords
tor@16862	640	// // (new, do, etc) - we can't handle that!
tor@16862	641	// for (String keyword : PythonUtils.PYTHON_KEYWORDS) { // reserved words are okay
tor@16862	642	// if (line.endsWith(keyword)) {
tor@16862	643	// if ("print".equals(keyword)) { // NOI18N
tor@16862	644	// // Only remove the keyword if it's the end of the line. Otherwise,
tor@16862	645	// // it could have just been typed in front of something (e.g. inserted a print) and we don't
tor@16862	646	// // want to confuse the parser with "va foo" instead of "var foo"
tor@16862	647	// if (!isLineEnd) {
tor@16862	648	// continue;
tor@16862	649	// }
tor@16862	650	// }
tor@16862	651	// removeChars = 1;
tor@16862	652	// break;
tor@16862	653	// }
tor@16862	654	// }
tor@16862	655	}
tor@16862	656
tor@16862	657	if (removeChars == 0) {
tor@16862	658	return false;
tor@16862	659	}
tor@16862	660
tor@16862	661	int removeStart = removeEnd - removeChars;
tor@16862	662
tor@16862	663	sb.append(doc.substring(0, removeStart));
tor@16862	664
tor@16862	665	for (int i = 0; i < removeChars; i++) {
tor@16862	666	sb.append(' ');
tor@16862	667	}
tor@16862	668
tor@16862	669	if (removeEnd < doc.length()) {
tor@16862	670	sb.append(doc.substring(removeEnd, doc.length()));
tor@16862	671	}
tor@16862	672	assert sb.length() == doc.length();
tor@16862	673
tor@16862	674	context.sanitizedRange = new OffsetRange(removeStart, removeEnd);
tor@16862	675	context.sanitizedSource = sb.toString();
tor@16862	676	context.sanitizedContents = doc.substring(removeStart, removeEnd);
tor@16862	677	return true;
tor@16862	678	}
tor@16862	679	}
tor@16862	680	}
tor@16862	681	} catch (BadLocationException ble) {
tor@16862	682	Exceptions.printStackTrace(ble);
tor@16862	683	}
tor@16862	684
tor@16862	685	return false;
tor@16862	686	}
tor@16862	687
tor@17173	688	private static int findLineOffset(String source, int line) {
tor@17173	689	int offset = -1;
tor@17173	690	for (int i = 0; i < line; i++) {
tor@17173	691	offset = source.indexOf("\n", offset+1);
tor@17173	692	if (offset == -1) {
tor@17173	693	return source.length();
tor@17173	694	}
tor@17173	695	}
tor@17173	696
tor@17173	697	return Math.min(source.length(), offset+1);
tor@17173	698	}
tor@17173	699
tor@16862	700	/** Attempts to sanitize the input buffer */
tor@16862	701	public static enum Sanitize {
tor@16862	702	/** Only parse the current file accurately, don't try heuristics */
tor@16862	703	NEVER,
tor@16862	704	/** Perform no sanitization */
tor@16862	705	NONE,
tor@16862	706	/** Try to remove the trailing . or :: at the caret line */
tor@16862	707	EDITED_DOT,
tor@16862	708	/** Try to remove the trailing . or :: at the error position, or the prior
tor@16862	709	* line, or the caret line */
tor@16862	710	ERROR_DOT,
tor@16862	711	/** Try to cut out the error line */
tor@16862	712	ERROR_LINE,
tor@16862	713	/** Try to cut out the current edited line, if known */
tor@16862	714	EDITED_LINE,
tor@16862	715	}
tor@16862	716
juniel_katarn@18215	717	/** Sanitize context */
tor@16862	718	public static class Context {
juniel_katarn@18215	719	private FileObject file;
juniel_katarn@18215	720	// private ParseListener listener;
tor@16862	721	private int errorOffset;
tor@16862	722	private String source;
tor@16862	723	private String sanitizedSource;
tor@16862	724	private OffsetRange sanitizedRange = OffsetRange.NONE;
tor@16862	725	private String sanitizedContents;
tor@16862	726	private int caretOffset;
tor@16862	727	private Sanitize sanitized = Sanitize.NONE;
juniel_katarn@18215	728	// private TranslatedSource translatedSource;
juniel_katarn@18215	729	// private Parser.Job job;
juniel_katarn@18215	730	private Snapshot snapshot;
juniel_katarn@18215	731	private Task task;
juniel_katarn@18215	732	private SourceModificationEvent event;
juniel_katarn@18215	733	//
juniel_katarn@18215	734	// public Context(ParserFile parserFile, ParseListener listener, String source, int caretOffset, TranslatedSource translatedSource, Parser.Job job) {
juniel_katarn@18215	735	// this.file = parserFile;
juniel_katarn@18215	736	// this.listener = listener;
juniel_katarn@18215	737	// this.source = source;
juniel_katarn@18215	738	// this.caretOffset = caretOffset;
juniel_katarn@18215	739	// this.translatedSource = translatedSource;
juniel_katarn@18215	740	// this.job = job;
juniel_katarn@18215	741	//
juniel_katarn@18215	742	//
juniel_katarn@18215	743	// if (caretOffset != -1) {
juniel_katarn@18215	744	// sanitized = Sanitize.EDITED_DOT;
juniel_katarn@18215	745	// }
juniel_katarn@18215	746	// }
juniel_katarn@18215	747	//
juniel_katarn@18215	748	// @Override
juniel_katarn@18215	749	// public String toString() {
juniel_katarn@18215	750	// return "PythonParser.Context(" + file.toString() + ")"; // NOI18N
juniel_katarn@18215	751	// }
juniel_katarn@18215	752	//
juniel_katarn@18215	753	// public OffsetRange getSanitizedRange() {
juniel_katarn@18215	754	// return sanitizedRange;
juniel_katarn@18215	755	// }
juniel_katarn@18215	756	//
juniel_katarn@18215	757	// public Sanitize getSanitized() {
juniel_katarn@18215	758	// return sanitized;
juniel_katarn@18215	759	// }
juniel_katarn@18215	760	//
juniel_katarn@18215	761	// public String getSanitizedSource() {
juniel_katarn@18215	762	// return sanitizedSource;
juniel_katarn@18215	763	// }
juniel_katarn@18215	764	//
juniel_katarn@18215	765	// public int getErrorOffset() {
juniel_katarn@18215	766	// return errorOffset;
juniel_katarn@18215	767	// }
tor@16862	768	}
tor@16862	769	}

author	Julien Enselme <jenselme@netbeans.org>
	Tue, 27 Jun 2017 21:26:18 +0200
changeset 18429	517409415907
parent 18392	6ccd27d3f884
permissions	-rw-r--r--