2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
4 * Copyright 2010 Oracle and/or its affiliates. All rights reserved.
6 * Oracle and Java are registered trademarks of Oracle and/or its affiliates.
7 * Other names may be trademarks of their respective owners.
9 * The contents of this file are subject to the terms of either the GNU
10 * General Public License Version 2 only ("GPL") or the Common
11 * Development and Distribution License("CDDL") (collectively, the
12 * "License"). You may not use this file except in compliance with the
13 * License. You can obtain a copy of the License at
14 * http://www.netbeans.org/cddl-gplv2.html
15 * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
16 * specific language governing permissions and limitations under the
17 * License. When distributing the software, include this License Header
18 * Notice in each file and include the License file at
19 * nbbuild/licenses/CDDL-GPL-2-CP. Oracle designates this
20 * particular file as subject to the "Classpath" exception as provided
21 * by Oracle in the GPL Version 2 section of the License file that
22 * accompanied this code. If applicable, add the following below the
23 * License Header, with the fields enclosed by brackets [] replaced by
24 * your own identifying information:
25 * "Portions Copyrighted [year] [name of copyright owner]"
27 * If you wish your version of this file to be governed by only the CDDL
28 * or only the GPL Version 2, indicate your decision by adding
29 * "[Contributor] elects to include this software in this distribution
30 * under the [CDDL or GPL Version 2] license." If you do not indicate a
31 * single choice of license, a recipient has the option to distribute
32 * your version of this file under either the CDDL, the GPL Version 2 or
33 * to extend the choice of license to its licensees as provided above.
34 * However, if you add GPL Version 2 code and therefore, elected the GPL
35 * Version 2 license, then the option applies only if the new code is
36 * made subject to such option by the copyright holder.
40 * Portions Copyrighted 2008 Sun Microsystems, Inc.
42 package org.netbeans.modules.python.source;
44 import java.util.ArrayList;
45 import java.util.Collections;
46 import java.util.HashMap;
47 import java.util.List;
49 import javax.swing.text.BadLocationException;
50 import javax.swing.text.Document;
51 import javax.swing.text.JTextComponent;
52 import org.netbeans.api.editor.EditorRegistry;
53 import org.netbeans.api.lexer.TokenSequence;
54 import org.netbeans.modules.python.source.lexer.PythonLexerUtils;
55 import org.netbeans.modules.python.source.lexer.PythonTokenId;
56 import org.netbeans.api.lexer.Token;
57 import org.netbeans.api.lexer.TokenId;
58 import org.netbeans.api.lexer.TokenUtilities;
59 import org.netbeans.editor.BaseDocument;
60 import org.netbeans.editor.Utilities;
61 import org.netbeans.modules.csl.api.EditList;
62 import org.netbeans.modules.csl.api.Formatter;
63 import org.netbeans.modules.csl.spi.GsfUtilities;
64 import org.netbeans.modules.csl.spi.ParserResult;
65 import org.netbeans.modules.editor.indent.api.IndentUtils;
66 import org.netbeans.modules.editor.indent.spi.Context;
67 import org.openide.util.Exceptions;
70 * Implement formatting for Python. Since there are no {}'s etc. to uniquely
71 * impose indentation on Python, this formatter really just tries to enforce
72 * spaces-versus-tabs, and indentation width. E.g. it uses the existing indentation
73 * to determine whether the next line should be idented more, same or less as the
74 * current line and then enforces the current space and indent size settings.
76 * @todo Implement pretty printing: inserting newlines, removing spaces inside
77 * parentheses, etc. See the recommendations in
78 * http://www.python.org/dev/peps/pep-0008/
79 * Do import statement cleanup too.
80 * @todo Line up comment lines (# as a suffix, continued from a previous line)
81 * @todo Handle continuation lines with extra indentation
82 * @todo Line up list initializations better?
86 public class PythonFormatter implements Formatter {
87 private int indentSize;
88 private int continuationIndentSize;
89 private CodeStyle codeStyle;
91 public PythonFormatter() {
94 public PythonFormatter(CodeStyle codeStyle) {
95 this.codeStyle = codeStyle;
99 public void reformat(Context context, ParserResult compilationInfo) {
101 // No AST pretty printing yet
102 // I should offer to go and do space insert/removal around commas, parentheses, etc.
103 // as well as balancing long argument lists across lines
104 Document document = context.document();
105 int startOffset = context.startOffset();
106 int endOffset = context.endOffset();
108 reformat(context, document, startOffset, endOffset, (PythonParserResult) compilationInfo);
111 public void reformat(final Context context, Document document, int startOffset, int endOffset, PythonParserResult info) {
112 if (codeStyle == null) {
113 codeStyle = CodeStyle.getDefault(context.document());
115 if (info != null && codeStyle != null && codeStyle.formatImports() && !GsfUtilities.isCodeTemplateEditing(document) &&
116 PythonAstUtils.getParseResult(info) != null) {
117 new ImportManager(info, (BaseDocument)document, codeStyle).cleanup(null, startOffset, endOffset, false);
120 if (codeStyle != null) {
121 cleanup(document, info, startOffset, endOffset);
124 reindent(context, document, startOffset, endOffset);
128 public boolean needsParserResult() {
129 // if (SourceUtils.isScanInProgress()) {
133 // If we're going to format imports, then yes, we need the parser result
134 JTextComponent target = EditorRegistry.lastFocusedComponent();
135 if (target != null) {
136 CodeStyle cs = CodeStyle.getDefault(target.getDocument());
137 return cs != null ? cs.formatImports() : false;
143 public int indentSize() {
144 // 4 spaces: See http://www.python.org/dev/peps/pep-0008/
149 public int hangingIndentSize() {
153 // Challenge: Two inconsistently formatted
154 // Idea: Given a list of offsets and indentation, produce a graph (or recurse) where I mark all
155 // siblings the exact same level
158 // Find smallest indent: That's the top level
159 // Build a graph? Each indent line.
162 public void reindent(final Context context) {
163 Document document = context.document();
164 int startOffset = context.startOffset();
165 int endOffset = context.endOffset();
167 reindent(context, document, startOffset, endOffset);
170 @SuppressWarnings("deprecation") // For doc.getFormatter()
171 public void reindent(final Context context, Document document, int startOffset, int endOffset) {
172 endOffset = Math.min(endOffset, document.getLength());
173 startOffset = Math.min(startOffset, endOffset);
175 continuationIndentSize = indentSize = IndentUtils.indentLevelSize(document);
178 final BaseDocument doc = (BaseDocument)document;
180 // Plan: Go through the lines, one by one, and compute the indentation levels relative to each other,
181 // then normalize them (except inside strings), then apply!!
182 // Also track whether we are used for newline indentation and if so, do smart bracket stuff
184 // Current indentation for the given line. -1 means that it should be left alone (e.g.
185 // we don't mess with multiline string literals.
186 final List<Integer> offsets = new ArrayList<>();
188 // Current indentation for the given line. -1 means that it should be left alone (e.g.
189 // we don't mess with multiline string literals. Other negative numbers are offsets
190 // pointing at a particular left parenthesis that this line should be aligned with
191 final List<Integer> indentation = new ArrayList<>();
192 final List<Integer> lParenOffsets = new ArrayList<>();
195 doc.readLock(); // For token hierarchy usage
197 TokenSequence<? extends PythonTokenId> ts = PythonLexerUtils.getPythonSequence(doc, startOffset);
199 int currentOffset = Utilities.getRowStart(doc, startOffset);
201 while (currentOffset <= endOffset) {
202 if (!(Utilities.isRowEmpty(doc, currentOffset) || Utilities.isRowWhite(doc, currentOffset))) {
203 Token<? extends PythonTokenId> token = PythonLexerUtils.getToken(doc, currentOffset);
204 int indent = GsfUtilities.getLineIndent(doc, currentOffset);
206 if (token.id() == PythonTokenId.STRING_LITERAL || token.id() == PythonTokenId.STRING_END) {
213 indentation.add(indent);
214 offsets.add(currentOffset);
216 assert balance <= lParenOffsets.size();
217 int parenOffset = lParenOffsets.get(lParenOffsets.size()-balance);
218 indentation.add(-parenOffset);
219 offsets.add(currentOffset);
224 // TODO - look up the tokens to make sure we don't have a problem with literal nodes
226 if (currentOffset > doc.getLength()) {
230 // Update the line balance
231 int begin = Utilities.getRowStart(doc, currentOffset);
232 int end = Utilities.getRowEnd(doc, currentOffset);
238 Token<? extends PythonTokenId> token = ts.token();
239 TokenId id = token.id();
241 if (id == PythonTokenId.LPAREN) {
243 lParenOffsets.add(ts.offset());
244 } else if (id == PythonTokenId.RPAREN) {
246 if (!lParenOffsets.isEmpty()) {
247 lParenOffsets.remove(lParenOffsets.size()-1);
250 } while (ts.moveNext() && (ts.offset() <= end));
253 currentOffset = Utilities.getRowEnd(doc, currentOffset) + 1;
260 if (offsets.size() == 0) {
264 assert indentation.size() == offsets.size();
266 final Map<Integer, Integer> offsetToLevel = new HashMap<>();
267 final Map<Integer,Integer> offsetToIndex = new HashMap<>();
268 List<Integer> parentIndentations = new ArrayList<>();
269 int currentParentIndent = -1;
270 int currentLevel = -1;
272 int firstIndent = indentation.get(0);
273 List<Integer> sorted = new ArrayList<>(indentation);
274 Collections.sort(sorted);
275 // Attempt to shift the computed indentation to fit the right indentation levels
276 // that are currently in the file?
278 for (; firstNonNeg < sorted.size(); firstNonNeg++) {
279 if (sorted.get(firstNonNeg) >= 0) {
283 boolean shiftToCurrent = true;
284 if (firstIndent > sorted.get(firstNonNeg)) {
285 shiftToCurrent = false;
286 // The start is not at the top level... e.g. we have something like
290 // (e.g. we are formatting a fragment of code which doesn't include
291 // the top). Here we need to find the "true" top levels, so we
292 // push levels on to the stack
294 for (int indent : sorted) {
295 if (prev == indent) {
299 if (indent < firstIndent) {
300 parentIndentations.add(currentParentIndent);
301 currentParentIndent = indent;
310 // TODO: What if I start in the middle of an expression such that I outdent
311 // more than I indent? I have to build up the index levels if necessary
312 // Go count popping levels
314 for (int i = 0, n = offsets.size(); i < n; i++) {
315 int offset = offsets.get(i);
316 int indent = indentation.get(i);
319 offsetToLevel.put(offset, -1);
322 offsetToIndex.put(offset, i);
325 // Want to keep everything the same as the prev, plus delta
326 } else if (indent > currentParentIndent) {
329 parentIndentations.add(currentParentIndent);
330 currentParentIndent = indent;
331 } else if (indent < currentParentIndent) {
332 while (currentParentIndent > indent) {
334 if (parentIndentations.size() > 0) {
335 currentParentIndent = parentIndentations.remove(parentIndentations.size() - 1);
337 currentParentIndent = indent;
342 offsetToLevel.put(offset, currentLevel);
345 // Compute relative shift
346 int firstLineIndent = indentation.get(0);
347 int firstLineLevel = offsetToLevel.get(offsets.get(0));
348 int computedIndent = firstLineLevel * indentSize;
349 final int relativeShift = shiftToCurrent ? computedIndent - firstLineIndent : 0;
351 doc.runAtomic(new Runnable() {
354 int[] computedIndents = new int[offsets.size()];
355 // Process backwards so I don't have to worry about updating offsets affected by
356 // indentation changes
357 for (int i = offsets.size() - 1; i >= 0; i--) {
358 int indent = indentation.get(i);
364 int offset = offsets.get(i);
365 int level = offsetToLevel.get(offset);
366 int computedIndent = level * indentSize - relativeShift;
367 if (computedIndent < 0) {
370 computedIndents[i] =computedIndent;
372 computedIndents[i] = -1;
376 for (int i = offsets.size() - 1; i >= 0; i--) {
377 int indent = indentation.get(i);
380 // Negative offset pointing to a left parenthesis we should align with
381 int parenOffset = -indent;
382 int lineStart = Utilities.getRowStart(doc, parenOffset);
383 if (lineStart != -1) {
384 int parenLineIndent = computedIndents[offsetToIndex.get(lineStart)];
385 assert parenLineIndent >= 0;
386 int textBegin = Utilities.getRowFirstNonWhite(doc, lineStart);
387 assert textBegin != -1;
388 // Indent to new indentation + text up to paren plus the paren itself
389 int newIndent = parenLineIndent + (parenOffset-textBegin) + 1;
390 computedIndents[i] = newIndent;
392 } catch (BadLocationException ble) {
393 Exceptions.printStackTrace(ble);
398 // Process backwards so I don't have to worry about updating offsets affected by
399 // indentation changes
400 for (int i = offsets.size() - 1; i >= 0; i--) {
401 int indent = indentation.get(i);
406 int offset = offsets.get(i);
407 int computedIndent = computedIndents[i];
408 if (computedIndent < 0) {
412 if (computedIndent != indent && context != null) {
414 context.modifyIndent(offset, computedIndent);
415 } catch (BadLocationException ex) {
416 Exceptions.printStackTrace(ex);
422 } catch (BadLocationException ble) {
423 Exceptions.printStackTrace(ble);
427 private boolean isLinePrefix(BaseDocument doc, int offset) throws BadLocationException {
428 return Utilities.getRowFirstNonWhite(doc, offset) == offset;
431 private void cleanup(Document document, PythonParserResult info, int startOffset, int endOffset) {
432 BaseDocument doc = (BaseDocument)document;
433 final EditList edits = new EditList(doc);
435 doc.readLock(); // For token hierarchy usage
437 TokenSequence<? extends PythonTokenId> ts = PythonLexerUtils.getPythonSequence(doc, startOffset);
442 ts.move(startOffset);
447 // Control whether I collapse spaces to a single space, or just ensure there is at least one
448 // "None", "1", "At least 1", "Leave Alone"
450 // TODO: Insert and remove needed or unnecessary parentheses!
451 // TODO: Alignment! Especially of trailing line comments on adjacent lines!
452 // TODO: Collapse blank newlines!
453 boolean addSpaceAroundOperators = true;
454 boolean removeSpaceInsideParens = true; // also applies to braces and brackets
455 boolean addSpaceAfterComma = true;
456 // boolean spaceArondParens = false;
457 // boolean spaceBeforeArgs = false; // before parentheses in a call
458 boolean removeSpaceBeforeSep = true; // before comma, semicolon or colon
459 // boolean alignAssignments = false; // Only one space around assignments
460 boolean removeSpaceInParamAssign = true; // Around assignment in parameter list, e.g.
461 boolean collapseSpaces = true;
462 //def complex(real, imag=0.0):
463 // return magic(r=real, i=imag)
464 if (codeStyle != null) {
465 addSpaceAroundOperators = codeStyle.addSpaceAroundOperators();
466 removeSpaceInsideParens = codeStyle.removeSpaceInsideParens();
467 addSpaceAfterComma = codeStyle.addSpaceAfterComma();
468 removeSpaceBeforeSep = codeStyle.removeSpaceBeforeSep();
469 removeSpaceInParamAssign = codeStyle.removeSpaceInParamAssign();
470 collapseSpaces = codeStyle.collapseSpaces();
473 // TODO - back up to the nearest function or class or beginning of the document to get the right
474 // parenthesis balance.
475 int parenBalance = 0;
477 Token<? extends PythonTokenId> prev = null;
478 Token<? extends PythonTokenId> token = null;
479 Token<? extends PythonTokenId> next = null;
485 tokenOffset = ts.offset();
488 nextOffset = ts.offset();
493 boolean prevRemoved = false;
494 boolean tokenRemoved = false;
495 boolean nextRemoved = false;
496 while (token != null) {
497 TokenId prevId = prev != null ? prev.id() : null;
498 TokenId id = token.id();
499 TokenId nextId = next != null ? next.id() : null;
501 if (id == PythonTokenId.LPAREN) {
503 } else if (id == PythonTokenId.RPAREN) {
507 if (removeSpaceInsideParens) {
508 if (id == PythonTokenId.LPAREN) {
509 if (nextId == PythonTokenId.WHITESPACE && !nextRemoved) {
510 edits.replace(nextOffset, next.length(), null, false, 0);
513 } else if (id == PythonTokenId.RPAREN) {
514 if (prevId == PythonTokenId.WHITESPACE && !prevRemoved && !isLinePrefix(doc, tokenOffset)) {
515 // I don't remove space in front of paren's at the beginning of the line; these might have
516 // been aligned with indented content above
517 edits.replace(prevOffset, prev.length(), null, false, 0);
520 } else if (id == PythonTokenId.LBRACKET) {
521 if (nextId == PythonTokenId.WHITESPACE && !nextRemoved) {
522 edits.replace(nextOffset, next.length(), null, false, 0);
525 } else if (id == PythonTokenId.RBRACKET) {
526 if (prevId == PythonTokenId.WHITESPACE && !prevRemoved && !isLinePrefix(doc, tokenOffset)) {
527 edits.replace(prevOffset, prev.length(), null, false, 0);
530 } else if (id == PythonTokenId.LBRACE) {
531 if (nextId == PythonTokenId.WHITESPACE && !nextRemoved) {
532 edits.replace(nextOffset, next.length(), null, false, 0);
535 } else if (id == PythonTokenId.RBRACE) {
536 if (prevId == PythonTokenId.WHITESPACE && !prevRemoved && !isLinePrefix(doc, tokenOffset)) {
537 edits.replace(prevOffset, prev.length(), null, false, 0);
543 if (addSpaceAfterComma) {
544 if (id == PythonTokenId.COMMA) {
545 if (collapseSpaces && nextId == PythonTokenId.WHITESPACE && next.length() > 1) {
546 edits.replace(nextOffset, next.length() - 1, null, false, 1); // NOI18N
547 } else if (next == null ||
548 (nextId != PythonTokenId.WHITESPACE && nextId != PythonTokenId.NEWLINE)) {
549 edits.replace(nextOffset, 0, " ", false, 1); // NOI18N
554 if (removeSpaceBeforeSep &&
555 (id == PythonTokenId.COMMA || id == PythonTokenId.COLON ||
556 (id == PythonTokenId.ANY_OPERATOR && TokenUtilities.equals(token.text(), ";"))) && // NOI18N
557 prevId == PythonTokenId.WHITESPACE && !prevRemoved && !isLinePrefix(doc, tokenOffset)) {
558 edits.replace(prevOffset, prev.length(), null, false, 2);
562 if (addSpaceAroundOperators && id == PythonTokenId.ANY_OPERATOR) {
563 CharSequence seq = token.text();
565 // These aren't binary, and ; isn't really an operator and has its own setting
566 if (!(TokenUtilities.equals(seq, "@") || // NOI18N
567 TokenUtilities.equals(seq, "`") || // NOI18N
568 TokenUtilities.equals(seq, ";"))) { // NOI18N
570 boolean insertSpace = true;
571 if (removeSpaceInParamAssign && TokenUtilities.equals(seq, "=")) { // NOI18N
572 // Special handling: keyword arguments should typically NOT
573 // have space inserted
574 if (parenBalance > 0) {
576 // Remove spaces around the =
577 if (prevId == PythonTokenId.WHITESPACE && !prevRemoved) {
578 edits.replace(prevOffset, prev.length(), null, false, 5); // NOI18N
581 if (nextId == PythonTokenId.WHITESPACE && !nextRemoved) {
582 edits.replace(nextOffset, next.length(), null, false, 6); // NOI18N
588 if (insertSpace && TokenUtilities.equals(seq, "-")/* && (nextId == PythonTokenId.FLOAT_LITERAL || nextId == PythonTokenId.INT_LITERAL)*/) {
589 // Leave -'s alone for now. The code is a little unclear on the difference between
590 // x-1 and =-1 etc. For numbers (floating and integer) the minus isn't part of the lexical token for the number;
591 // it's a separate operator. However, it's tricky to tell this apart from the binary subtraction, since it depends
592 // on what came before. For now play it safe an leave these alone.
593 // TODO - implement this properly.
597 if (insertSpace && TokenUtilities.equals(seq, "*")) { // NOI18N
598 // "*" in (*foo) doesn't mean multiplication; it's not a binary operator here,
600 if (prevId == PythonTokenId.COMMA || prevId == PythonTokenId.LPAREN) {
606 // Ensure that we have space on both sides
607 if (collapseSpaces && prevId == PythonTokenId.WHITESPACE && next.length() > 1 &&
608 !isLinePrefix(doc, tokenOffset)) {
609 edits.replace(prevOffset, prev.length() - 1, null, false, 1); // NOI18N
610 } else if (prevId != PythonTokenId.WHITESPACE) {
611 edits.replace(tokenOffset, 0, " ", false, 3); // NOI18N
614 if (collapseSpaces && nextId == PythonTokenId.WHITESPACE && next.length() > 1) {
615 edits.replace(nextOffset, next.length() - 1, null, false, 1); // NOI18N
616 } else if (nextId != PythonTokenId.WHITESPACE && nextId != PythonTokenId.NEWLINE) {
617 edits.replace(nextOffset, 0, " ", false, 4); // NOI18N
623 if (tokenOffset + token.length() >= endOffset) {
627 prevRemoved = tokenRemoved;
628 tokenRemoved = nextRemoved;
633 prevOffset = tokenOffset;
634 tokenOffset = nextOffset;
637 nextOffset = ts.offset();
642 } catch (BadLocationException ble) {
643 Exceptions.printStackTrace(ble);
648 doc.runAtomic(new Runnable() {