2 # pep8.py - Check Python source code formatting, according to PEP 8
3 # Copyright (C) 2006-2009 Johann C. Rocholl <johann@rocholl.net>
4 # Copyright (C) 2009-2014 Florent Xicluna <florent.xicluna@gmail.com>
5 # Copyright (C) 2014 Ian Lee <ianlee1521@gmail.com>
7 # Permission is hereby granted, free of charge, to any person
8 # obtaining a copy of this software and associated documentation files
9 # (the "Software"), to deal in the Software without restriction,
10 # including without limitation the rights to use, copy, modify, merge,
11 # publish, distribute, sublicense, and/or sell copies of the Software,
12 # and to permit persons to whom the Software is furnished to do so,
13 # subject to the following conditions:
15 # The above copyright notice and this permission notice shall be
16 # included in all copies or substantial portions of the Software.
18 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
22 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
23 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28 Check Python source code formatting, according to PEP 8.
30 For usage and a list of options, try this:
33 This program and its regression test suite live here:
34 http://github.com/jcrocholl/pep8
36 Groups of errors and warnings:
48 from __future__ import with_statement
57 from optparse import OptionParser
58 from fnmatch import fnmatch
60 from configparser import RawConfigParser
61 from io import TextIOWrapper
63 from ConfigParser import RawConfigParser
65 __version__ = '1.6.0a0'
67 DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git,__pycache__,.tox'
68 DEFAULT_IGNORE = 'E123,E226,E24,E704'
70 if sys.platform == 'win32':
71 DEFAULT_CONFIG = os.path.expanduser(r'~\.pep8')
73 DEFAULT_CONFIG = os.path.join(os.getenv('XDG_CONFIG_HOME') or
74 os.path.expanduser('~/.config'), 'pep8')
78 PROJECT_CONFIG = ('setup.cfg', 'tox.ini', '.pep8')
79 TESTSUITE_PATH = os.path.join(os.path.dirname(__file__), 'testsuite')
82 'default': '%(path)s:%(row)d:%(col)d: %(code)s %(text)s',
83 'pylint': '%(path)s:%(row)d: [%(code)s] %(text)s',
87 SINGLETONS = frozenset(['False', 'None', 'True'])
88 KEYWORDS = frozenset(keyword.kwlist + ['print']) - SINGLETONS
89 UNARY_OPERATORS = frozenset(['>>', '**', '*', '+', '-'])
90 ARITHMETIC_OP = frozenset(['**', '*', '/', '//', '+', '-'])
91 WS_OPTIONAL_OPERATORS = ARITHMETIC_OP.union(['^', '&', '|', '<<', '>>', '%'])
92 WS_NEEDED_OPERATORS = frozenset([
93 '**=', '*=', '/=', '//=', '+=', '-=', '!=', '<>', '<', '>',
94 '%=', '^=', '&=', '|=', '==', '<=', '>=', '<<=', '>>=', '='])
95 WHITESPACE = frozenset(' \t')
96 NEWLINE = frozenset([tokenize.NL, tokenize.NEWLINE])
97 SKIP_TOKENS = NEWLINE.union([tokenize.INDENT, tokenize.DEDENT])
98 # ERRORTOKEN is triggered by backticks in Python 3
99 SKIP_COMMENTS = SKIP_TOKENS.union([tokenize.COMMENT, tokenize.ERRORTOKEN])
100 BENCHMARK_KEYS = ['directories', 'files', 'logical lines', 'physical lines']
102 INDENT_REGEX = re.compile(r'([ \t]*)')
103 RAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,')
104 RERAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,.*,\s*\w+\s*$')
105 ERRORCODE_REGEX = re.compile(r'\b[A-Z]\d{3}\b')
106 DOCSTRING_REGEX = re.compile(r'u?r?["\']')
107 EXTRANEOUS_WHITESPACE_REGEX = re.compile(r'[[({] | []}),;:]')
108 WHITESPACE_AFTER_COMMA_REGEX = re.compile(r'[,;:]\s*(?: |\t)')
109 COMPARE_SINGLETON_REGEX = re.compile(r'\b(None|False|True)?\s*([=!]=)'
110 r'\s*(?(1)|(None|False|True))\b')
111 COMPARE_NEGATIVE_REGEX = re.compile(r'\b(not)\s+[^][)(}{ ]+\s+(in|is)\s')
112 COMPARE_TYPE_REGEX = re.compile(r'(?:[=!]=|is(?:\s+not)?)\s*type(?:s.\w+Type'
113 r'|\s*\(\s*([^)]*[^ )])\s*\))')
114 KEYWORD_REGEX = re.compile(r'(\s*)\b(?:%s)\b(\s*)' % r'|'.join(KEYWORDS))
115 OPERATOR_REGEX = re.compile(r'(?:[^,\s])(\s*)(?:[-+*/|!<=>%&^]+)(\s*)')
116 LAMBDA_REGEX = re.compile(r'\blambda\b')
117 HUNK_REGEX = re.compile(r'^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@.*$')
119 # Work around Python < 2.6 behaviour, which does not generate NL after
120 # a comment which is on a line by itself.
121 COMMENT_WITH_NL = tokenize.generate_tokens(['#\n'].pop).send(None)[1] == '#\n'
124 ##############################################################################
125 # Plugins (check functions) for physical lines
126 ##############################################################################
129 def tabs_or_spaces(physical_line, indent_char):
130 r"""Never mix tabs and spaces.
132 The most popular way of indenting Python is with spaces only. The
133 second-most popular way is with tabs only. Code indented with a mixture
134 of tabs and spaces should be converted to using spaces exclusively. When
135 invoking the Python command line interpreter with the -t option, it issues
136 warnings about code that illegally mixes tabs and spaces. When using -tt
137 these warnings become errors. These options are highly recommended!
139 Okay: if a == 0:\n a = 1\n b = 1
140 E101: if a == 0:\n a = 1\n\tb = 1
142 indent = INDENT_REGEX.match(physical_line).group(1)
143 for offset, char in enumerate(indent):
144 if char != indent_char:
145 return offset, "E101 indentation contains mixed spaces and tabs"
148 def tabs_obsolete(physical_line):
149 r"""For new projects, spaces-only are strongly recommended over tabs.
151 Okay: if True:\n return
152 W191: if True:\n\treturn
154 indent = INDENT_REGEX.match(physical_line).group(1)
156 return indent.index('\t'), "W191 indentation contains tabs"
159 def trailing_whitespace(physical_line):
160 r"""Trailing whitespace is superfluous.
162 The warning returned varies on whether the line itself is blank, for easier
163 filtering for those who want to indent their blank lines.
167 W293: class Foo(object):\n \n bang = 12
169 physical_line = physical_line.rstrip('\n') # chr(10), newline
170 physical_line = physical_line.rstrip('\r') # chr(13), carriage return
171 physical_line = physical_line.rstrip('\x0c') # chr(12), form feed, ^L
172 stripped = physical_line.rstrip(' \t\v')
173 if physical_line != stripped:
175 return len(stripped), "W291 trailing whitespace"
177 return 0, "W293 blank line contains whitespace"
180 def trailing_blank_lines(physical_line, lines, line_number, total_lines):
181 r"""Trailing blank lines are superfluous.
186 However the last line should end with a new line (warning W292).
188 if line_number == total_lines:
189 stripped_last_line = physical_line.rstrip()
190 if not stripped_last_line:
191 return 0, "W391 blank line at end of file"
192 if stripped_last_line == physical_line:
193 return len(physical_line), "W292 no newline at end of file"
196 def maximum_line_length(physical_line, max_line_length, multiline):
197 r"""Limit all lines to a maximum of 79 characters.
199 There are still many devices around that are limited to 80 character
200 lines; plus, limiting windows to 80 characters makes it possible to have
201 several windows side-by-side. The default wrapping on such devices looks
202 ugly. Therefore, please limit all lines to a maximum of 79 characters.
203 For flowing long blocks of text (docstrings or comments), limiting the
204 length to 72 characters is recommended.
208 line = physical_line.rstrip()
210 if length > max_line_length and not noqa(line):
211 # Special case for long URLs in multi-line docstrings or comments,
212 # but still report the error when the 72 first chars are whitespaces.
213 chunks = line.split()
214 if ((len(chunks) == 1 and multiline) or
215 (len(chunks) == 2 and chunks[0] == '#')) and \
216 len(line) - len(chunks[-1]) < max_line_length - 7:
218 if hasattr(line, 'decode'): # Python 2
219 # The line could contain multi-byte characters
221 length = len(line.decode('utf-8'))
224 if length > max_line_length:
225 return (max_line_length, "E501 line too long "
226 "(%d > %d characters)" % (length, max_line_length))
229 ##############################################################################
230 # Plugins (check functions) for logical lines
231 ##############################################################################
234 def blank_lines(logical_line, blank_lines, indent_level, line_number,
235 blank_before, previous_logical, previous_indent_level):
236 r"""Separate top-level function and class definitions with two blank lines.
238 Method definitions inside a class are separated by a single blank line.
240 Extra blank lines may be used (sparingly) to separate groups of related
241 functions. Blank lines may be omitted between a bunch of related
242 one-liners (e.g. a set of dummy implementations).
244 Use blank lines in functions, sparingly, to indicate logical sections.
246 Okay: def a():\n pass\n\n\ndef b():\n pass
247 Okay: def a():\n pass\n\n\n# Foo\n# Bar\n\ndef b():\n pass
249 E301: class Foo:\n b = 0\n def bar():\n pass
250 E302: def a():\n pass\n\ndef b(n):\n pass
251 E303: def a():\n pass\n\n\n\ndef b(n):\n pass
252 E303: def a():\n\n\n\n pass
253 E304: @decorator\n\ndef a():\n pass
255 if line_number < 3 and not previous_logical:
256 return # Don't expect blank lines before the first line
257 if previous_logical.startswith('@'):
259 yield 0, "E304 blank lines found after function decorator"
260 elif blank_lines > 2 or (indent_level and blank_lines == 2):
261 yield 0, "E303 too many blank lines (%d)" % blank_lines
262 elif logical_line.startswith(('def ', 'class ', '@')):
264 if not (blank_before or previous_indent_level < indent_level or
265 DOCSTRING_REGEX.match(previous_logical)):
266 yield 0, "E301 expected 1 blank line, found 0"
267 elif blank_before != 2:
268 yield 0, "E302 expected 2 blank lines, found %d" % blank_before
271 def extraneous_whitespace(logical_line):
272 r"""Avoid extraneous whitespace.
274 Avoid extraneous whitespace in these situations:
275 - Immediately inside parentheses, brackets or braces.
276 - Immediately before a comma, semicolon, or colon.
278 Okay: spam(ham[1], {eggs: 2})
279 E201: spam( ham[1], {eggs: 2})
280 E201: spam(ham[ 1], {eggs: 2})
281 E201: spam(ham[1], { eggs: 2})
282 E202: spam(ham[1], {eggs: 2} )
283 E202: spam(ham[1 ], {eggs: 2})
284 E202: spam(ham[1], {eggs: 2 })
286 E203: if x == 4: print x, y; x, y = y , x
287 E203: if x == 4: print x, y ; x, y = y, x
288 E203: if x == 4 : print x, y; x, y = y, x
291 for match in EXTRANEOUS_WHITESPACE_REGEX.finditer(line):
294 found = match.start()
295 if text == char + ' ':
296 # assert char in '([{'
297 yield found + 1, "E201 whitespace after '%s'" % char
298 elif line[found - 1] != ',':
299 code = ('E202' if char in '}])' else 'E203') # if char in ',;:'
300 yield found, "%s whitespace before '%s'" % (code, char)
303 def whitespace_around_keywords(logical_line):
304 r"""Avoid extraneous whitespace around keywords.
309 E273: True and\tFalse
310 E274: True\tand False
312 for match in KEYWORD_REGEX.finditer(logical_line):
313 before, after = match.groups()
316 yield match.start(1), "E274 tab before keyword"
317 elif len(before) > 1:
318 yield match.start(1), "E272 multiple spaces before keyword"
321 yield match.start(2), "E273 tab after keyword"
323 yield match.start(2), "E271 multiple spaces after keyword"
326 def missing_whitespace(logical_line):
327 r"""Each comma, semicolon or colon should be followed by whitespace.
340 for index in range(len(line) - 1):
342 if char in ',;:' and line[index + 1] not in WHITESPACE:
343 before = line[:index]
344 if char == ':' and before.count('[') > before.count(']') and \
345 before.rfind('{') < before.rfind('['):
346 continue # Slice syntax, no space required
347 if char == ',' and line[index + 1] == ')':
348 continue # Allow tuple with only one element: (3,)
349 yield index, "E231 missing whitespace after '%s'" % char
352 def indentation(logical_line, previous_logical, indent_char,
353 indent_level, previous_indent_level):
354 r"""Use 4 spaces per indentation level.
356 For really old code that you don't want to mess up, you can continue to
360 Okay: if a == 0:\n a = 1
364 Okay: for item in items:\n pass
365 E112: for item in items:\npass
366 E115: for item in items:\n# Hi\n pass
370 E116: a = 1\n # b = 2
372 c = 0 if logical_line else 3
373 tmpl = "E11%d %s" if logical_line else "E11%d %s (comment)"
375 yield 0, tmpl % (1 + c, "indentation is not a multiple of four")
376 indent_expect = previous_logical.endswith(':')
377 if indent_expect and indent_level <= previous_indent_level:
378 yield 0, tmpl % (2 + c, "expected an indented block")
379 elif not indent_expect and indent_level > previous_indent_level:
380 yield 0, tmpl % (3 + c, "unexpected indentation")
383 def continued_indentation(logical_line, tokens, indent_level, hang_closing,
384 indent_char, noqa, verbose):
385 r"""Continuation lines indentation.
387 Continuation lines should align wrapped elements either vertically
388 using Python's implicit line joining inside parentheses, brackets
389 and braces, or using a hanging indent.
391 When using a hanging indent these considerations should be applied:
392 - there should be no arguments on the first line, and
393 - further indentation should be used to clearly distinguish itself as a
403 E124: a = (24,\n 42\n)
404 E125: if (\n b):\n pass
408 E129: if (a or\n b):\n pass
409 E131: a = (\n 42\n 24)
411 first_row = tokens[0][2][0]
412 nrows = 1 + tokens[-1][2][0] - first_row
413 if noqa or nrows == 1:
416 # indent_next tells us whether the next block is indented; assuming
417 # that it is indented by 4 spaces, then we should not allow 4-space
418 # indents on the final continuation line; in turn, some other
419 # indents are allowed to have an extra 4 spaces.
420 indent_next = logical_line.endswith(':')
423 valid_hangs = (4,) if indent_char != '\t' else (4, 8)
424 # remember how many brackets were opened on each line
426 # relative indents of physical lines
427 rel_indent = [0] * nrows
428 # for each depth, collect a list of opening rows
430 # for each depth, memorize the hanging indentation
434 last_indent = tokens[0][2]
436 # for each depth, memorize the visual indent column
437 indent = [last_indent[1]]
439 print(">>> " + tokens[0][4].rstrip())
441 for token_type, text, start, end, line in tokens:
443 newline = row < start[0] - first_row
445 row = start[0] - first_row
446 newline = not last_token_multiline and token_type not in NEWLINE
449 # this is the beginning of a continuation line.
452 print("... " + line.rstrip())
454 # record the initial indent.
455 rel_indent[row] = expand_indent(line) - indent_level
457 # identify closing bracket
458 close_bracket = (token_type == tokenize.OP and text in ']})')
460 # is the indent relative to an opening bracket line?
461 for open_row in reversed(open_rows[depth]):
462 hang = rel_indent[row] - rel_indent[open_row]
463 hanging_indent = hang in valid_hangs
467 hanging_indent = (hang == hangs[depth])
468 # is there any chance of visual indent?
469 visual_indent = (not close_bracket and hang > 0 and
470 indent_chances.get(start[1]))
472 if close_bracket and indent[depth]:
473 # closing bracket for visual indent
474 if start[1] != indent[depth]:
475 yield (start, "E124 closing bracket does not match "
476 "visual indentation")
477 elif close_bracket and not hang:
478 # closing bracket matches indentation of opening bracket's line
480 yield start, "E133 closing bracket is missing indentation"
481 elif indent[depth] and start[1] < indent[depth]:
482 if visual_indent is not True:
483 # visual indent is broken
484 yield (start, "E128 continuation line "
485 "under-indented for visual indent")
486 elif hanging_indent or (indent_next and rel_indent[row] == 8):
487 # hanging indent is verified
488 if close_bracket and not hang_closing:
489 yield (start, "E123 closing bracket does not match "
490 "indentation of opening bracket's line")
492 elif visual_indent is True:
493 # visual indent is verified
494 indent[depth] = start[1]
495 elif visual_indent in (text, str):
496 # ignore token lined up with matching one from a previous line
501 error = "E122", "missing indentation or outdented"
503 error = "E127", "over-indented for visual indent"
504 elif not close_bracket and hangs[depth]:
505 error = "E131", "unaligned for hanging indent"
509 error = "E126", "over-indented for hanging indent"
511 error = "E121", "under-indented for hanging indent"
512 yield start, "%s continuation line %s" % error
514 # look for visual indenting
515 if (parens[row] and token_type not in (tokenize.NL, tokenize.COMMENT)
516 and not indent[depth]):
517 indent[depth] = start[1]
518 indent_chances[start[1]] = True
520 print("bracket depth %s indent to %s" % (depth, start[1]))
521 # deal with implicit string concatenation
522 elif (token_type in (tokenize.STRING, tokenize.COMMENT) or
523 text in ('u', 'ur', 'b', 'br')):
524 indent_chances[start[1]] = str
525 # special case for the "if" statement because len("if (") == 4
526 elif not indent_chances and not row and not depth and text == 'if':
527 indent_chances[end[1] + 1] = True
528 elif text == ':' and line[end[1]:].isspace():
529 open_rows[depth].append(row)
531 # keep track of bracket depth
532 if token_type == tokenize.OP:
537 if len(open_rows) == depth:
539 open_rows[depth].append(row)
542 print("bracket depth %s seen, col %s, visual min = %s" %
543 (depth, start[1], indent[depth]))
544 elif text in ')]}' and depth > 0:
545 # parent indents should not be more than this one
546 prev_indent = indent.pop() or last_indent[1]
548 for d in range(depth):
549 if indent[d] > prev_indent:
551 for ind in list(indent_chances):
552 if ind >= prev_indent:
553 del indent_chances[ind]
554 del open_rows[depth + 1:]
557 indent_chances[indent[depth]] = True
558 for idx in range(row, -1, -1):
562 assert len(indent) == depth + 1
563 if start[1] not in indent_chances:
564 # allow to line up tokens
565 indent_chances[start[1]] = text
567 last_token_multiline = (start[0] != end[0])
568 if last_token_multiline:
569 rel_indent[end[0] - first_row] = rel_indent[row]
571 if indent_next and expand_indent(line) == indent_level + 4:
572 pos = (start[0], indent[0] + 4)
574 code = "E129 visually indented line"
576 code = "E125 continuation line"
577 yield pos, "%s with same indent as next logical line" % code
580 def whitespace_before_parameters(logical_line, tokens):
581 r"""Avoid extraneous whitespace.
583 Avoid extraneous whitespace in the following situations:
584 - before the open parenthesis that starts the argument list of a
586 - before the open parenthesis that starts an indexing or slicing.
591 Okay: dict['key'] = list[index]
592 E211: dict ['key'] = list[index]
593 E211: dict['key'] = list [index]
595 prev_type, prev_text, __, prev_end, __ = tokens[0]
596 for index in range(1, len(tokens)):
597 token_type, text, start, end, __ = tokens[index]
598 if (token_type == tokenize.OP and
600 start != prev_end and
601 (prev_type == tokenize.NAME or prev_text in '}])') and
602 # Syntax "class A (B):" is allowed, but avoid it
603 (index < 2 or tokens[index - 2][1] != 'class') and
604 # Allow "return (a.foo for a in range(5))"
605 not keyword.iskeyword(prev_text)):
606 yield prev_end, "E211 whitespace before '%s'" % text
607 prev_type = token_type
612 def whitespace_around_operator(logical_line):
613 r"""Avoid extraneous whitespace around an operator.
621 for match in OPERATOR_REGEX.finditer(logical_line):
622 before, after = match.groups()
625 yield match.start(1), "E223 tab before operator"
626 elif len(before) > 1:
627 yield match.start(1), "E221 multiple spaces before operator"
630 yield match.start(2), "E224 tab after operator"
632 yield match.start(2), "E222 multiple spaces after operator"
635 def missing_whitespace_around_operator(logical_line, tokens):
636 r"""Surround operators with a single space on either side.
638 - Always surround these binary operators with a single space on
639 either side: assignment (=), augmented assignment (+=, -= etc.),
640 comparisons (==, <, >, !=, <=, >=, in, not in, is, is not),
641 Booleans (and, or, not).
643 - If operators with different priorities are used, consider adding
644 whitespace around the operators with the lowest priorities.
649 Okay: hypot2 = x * x + y * y
650 Okay: c = (a + b) * (a - b)
651 Okay: foo(bar, key='word', *args, **kwargs)
658 E226: c = (a+b) * (a-b)
659 E226: hypot2 = x*x + y*y
661 E228: msg = fmt%(errno, errmsg)
665 prev_type = tokenize.OP
666 prev_text = prev_end = None
667 for token_type, text, start, end, line in tokens:
668 if token_type in SKIP_COMMENTS:
670 if text in ('(', 'lambda'):
675 if start != prev_end:
676 # Found a (probably) needed space
677 if need_space is not True and not need_space[1]:
678 yield (need_space[0],
679 "E225 missing whitespace around operator")
681 elif text == '>' and prev_text in ('<', '-'):
682 # Tolerate the "<>" operator, even if running Python 3
683 # Deal with Python 3's annotated return value "->"
686 if need_space is True or need_space[1]:
687 # A needed trailing space was not found
688 yield prev_end, "E225 missing whitespace around operator"
689 elif prev_text != '**':
690 code, optype = 'E226', 'arithmetic'
692 code, optype = 'E228', 'modulo'
693 elif prev_text not in ARITHMETIC_OP:
694 code, optype = 'E227', 'bitwise or shift'
695 yield (need_space[0], "%s missing whitespace "
696 "around %s operator" % (code, optype))
698 elif token_type == tokenize.OP and prev_end is not None:
699 if text == '=' and parens:
700 # Allow keyword args or defaults: foo(bar=None).
702 elif text in WS_NEEDED_OPERATORS:
704 elif text in UNARY_OPERATORS:
705 # Check if the operator is being used as a binary operator
706 # Allow unary operators: -123, -x, +1.
707 # Allow argument unpacking: foo(*args, **kwargs).
708 if (prev_text in '}])' if prev_type == tokenize.OP
709 else prev_text not in KEYWORDS):
711 elif text in WS_OPTIONAL_OPERATORS:
714 if need_space is None:
715 # Surrounding space is optional, but ensure that
716 # trailing space matches opening space
717 need_space = (prev_end, start != prev_end)
718 elif need_space and start == prev_end:
719 # A needed opening space was not found
720 yield prev_end, "E225 missing whitespace around operator"
722 prev_type = token_type
727 def whitespace_around_comma(logical_line):
728 r"""Avoid extraneous whitespace after a comma or a colon.
730 Note: these checks are disabled by default
737 for m in WHITESPACE_AFTER_COMMA_REGEX.finditer(line):
738 found = m.start() + 1
739 if '\t' in m.group():
740 yield found, "E242 tab after '%s'" % m.group()[0]
742 yield found, "E241 multiple spaces after '%s'" % m.group()[0]
745 def whitespace_around_named_parameter_equals(logical_line, tokens):
746 r"""Don't use spaces around the '=' sign in function arguments.
748 Don't use spaces around the '=' sign when used to indicate a
749 keyword argument or a default parameter value.
751 Okay: def complex(real, imag=0.0):
752 Okay: return magic(r=real, i=imag)
753 Okay: boolean(a == b)
754 Okay: boolean(a != b)
755 Okay: boolean(a <= b)
756 Okay: boolean(a >= b)
758 E251: def complex(real, imag = 0.0):
759 E251: return magic(r = real, i = imag)
764 message = "E251 unexpected spaces around keyword / parameter equals"
765 for token_type, text, start, end, line in tokens:
766 if token_type == tokenize.NL:
770 if start != prev_end:
771 yield (prev_end, message)
772 if token_type == tokenize.OP:
777 elif parens and text == '=':
779 if start != prev_end:
780 yield (prev_end, message)
784 def whitespace_before_comment(logical_line, tokens):
785 r"""Separate inline comments by at least two spaces.
787 An inline comment is a comment on the same line as a statement. Inline
788 comments should be separated by at least two spaces from the statement.
789 They should start with a # and a single space.
791 Each line of a block comment starts with a # and a single space
792 (unless it is indented text inside the comment).
794 Okay: x = x + 1 # Increment x
795 Okay: x = x + 1 # Increment x
796 Okay: # Block comment
797 E261: x = x + 1 # Increment x
798 E262: x = x + 1 #Increment x
799 E262: x = x + 1 # Increment x
801 E266: ### Block comment
804 for token_type, text, start, end, line in tokens:
805 if token_type == tokenize.COMMENT:
806 inline_comment = line[:start[1]].strip()
808 if prev_end[0] == start[0] and start[1] < prev_end[1] + 2:
810 "E261 at least two spaces before inline comment")
811 symbol, sp, comment = text.partition(' ')
812 bad_prefix = symbol not in '#:' and (symbol.lstrip('#')[:1] or '#')
814 if bad_prefix or comment[:1] in WHITESPACE:
815 yield start, "E262 inline comment should start with '# '"
816 elif bad_prefix and (bad_prefix != '!' or start[0] > 1):
817 if bad_prefix != '#':
818 yield start, "E265 block comment should start with '# '"
820 yield start, "E266 too many leading '#' for block comment"
821 elif token_type != tokenize.NL:
825 def imports_on_separate_lines(logical_line):
826 r"""Imports should usually be on separate lines.
828 Okay: import os\nimport sys
831 Okay: from subprocess import Popen, PIPE
832 Okay: from myclas import MyClass
833 Okay: from foo.bar.yourclass import YourClass
835 Okay: import foo.bar.yourclass
838 if line.startswith('import '):
839 found = line.find(',')
840 if -1 < found and ';' not in line[:found]:
841 yield found, "E401 multiple imports on one line"
844 def module_imports_on_top_of_file(
845 logical_line, indent_level, checker_state, noqa):
846 r"""Imports are always put at the top of the file, just after any module
847 comments and docstrings, and before module globals and constants.
850 Okay: # this is a comment\nimport os
851 Okay: '''this is a module docstring'''\nimport os
852 Okay: r'''this is a module docstring'''\nimport os
853 Okay: try:\n import x\nexcept:\n pass\nelse:\n pass\nimport y
854 Okay: try:\n import x\nexcept:\n pass\nfinally:\n pass\nimport y
856 E402: 'One string'\n"Two string"\nimport os
857 E402: a=1\nfrom sys import x
859 Okay: if x:\n import os
861 def is_string_literal(line):
862 if line[0] in 'uUbB':
864 if line and line[0] in 'rR':
866 return line and (line[0] == '"' or line[0] == "'")
868 allowed_try_keywords = ('try', 'except', 'else', 'finally')
870 if indent_level: # Allow imports in conditional statements or functions
872 if not logical_line: # Allow empty lines or comments
877 if line.startswith('import ') or line.startswith('from '):
878 if checker_state.get('seen_non_imports', False):
879 yield 0, "E402 import not at top of file"
880 elif any(line.startswith(kw) for kw in allowed_try_keywords):
881 # Allow try, except, else, finally keywords intermixed with imports in
882 # order to support conditional importing
884 elif is_string_literal(line):
885 # The first literal is a docstring, allow it. Otherwise, report error.
886 if checker_state.get('seen_docstring', False):
887 checker_state['seen_non_imports'] = True
889 checker_state['seen_docstring'] = True
891 checker_state['seen_non_imports'] = True
894 def compound_statements(logical_line):
895 r"""Compound statements (on the same line) are generally discouraged.
897 While sometimes it's okay to put an if/for/while with a small body
898 on the same line, never do this for multi-clause statements.
899 Also avoid folding such long lines!
901 Always use a def statement instead of an assignment statement that
902 binds a lambda expression directly to a name.
904 Okay: if foo == 'blah':\n do_blah_thing()
909 E701: if foo == 'blah': do_blah_thing()
910 E701: for x in lst: total += x
911 E701: while t < 10: t = delay()
912 E701: if foo == 'blah': do_blah_thing()
913 E701: else: do_non_blah_thing()
914 E701: try: something()
915 E701: finally: cleanup()
916 E701: if foo == 'blah': one(); two(); three()
917 E702: do_one(); do_two(); do_three()
918 E703: do_four(); # useless semicolon
919 E704: def f(x): return 2*x
920 E731: f = lambda x: 2*x
923 last_char = len(line) - 1
924 found = line.find(':')
925 while -1 < found < last_char:
926 before = line[:found]
927 if ((before.count('{') <= before.count('}') and # {'a': 1} (dict)
928 before.count('[') <= before.count(']') and # [1:2] (slice)
929 before.count('(') <= before.count(')'))): # (annotation)
930 lambda_kw = LAMBDA_REGEX.search(before)
932 before = line[:lambda_kw.start()].rstrip()
933 if before[-1:] == '=' and isidentifier(before[:-1].strip()):
934 yield 0, ("E731 do not assign a lambda expression, use a "
937 if before.startswith('def '):
938 yield 0, "E704 multiple statements on one line (def)"
940 yield found, "E701 multiple statements on one line (colon)"
941 found = line.find(':', found + 1)
942 found = line.find(';')
944 if found < last_char:
945 yield found, "E702 multiple statements on one line (semicolon)"
947 yield found, "E703 statement ends with a semicolon"
948 found = line.find(';', found + 1)
951 def explicit_line_join(logical_line, tokens):
952 r"""Avoid explicit line join between brackets.
954 The preferred way of wrapping long lines is by using Python's implied line
955 continuation inside parentheses, brackets and braces. Long lines can be
956 broken over multiple lines by wrapping expressions in parentheses. These
957 should be used in preference to using a backslash for line continuation.
959 E502: aaa = [123, \\n 123]
960 E502: aaa = ("bbb " \\n "ccc")
962 Okay: aaa = [123,\n 123]
963 Okay: aaa = ("bbb "\n "ccc")
964 Okay: aaa = "bbb " \\n "ccc"
966 prev_start = prev_end = parens = 0
967 for token_type, text, start, end, line in tokens:
968 if start[0] != prev_start and parens and backslash:
969 yield backslash, "E502 the backslash is redundant between brackets"
970 if end[0] != prev_end:
971 if line.rstrip('\r\n').endswith('\\'):
972 backslash = (end[0], len(line.splitlines()[-1]) - 1)
975 prev_start = prev_end = end[0]
977 prev_start = start[0]
978 if token_type == tokenize.OP:
985 def comparison_to_singleton(logical_line, noqa):
986 r"""Comparison to singletons should use "is" or "is not".
988 Comparisons to singletons like None should always be done
989 with "is" or "is not", never the equality operators.
991 Okay: if arg is not None:
992 E711: if arg != None:
993 E711: if None == arg:
994 E712: if arg == True:
995 E712: if False == arg:
997 Also, beware of writing if x when you really mean if x is not None --
998 e.g. when testing whether a variable or argument that defaults to None was
999 set to some other value. The other value might have a type (such as a
1000 container) that could be false in a boolean context!
1002 match = not noqa and COMPARE_SINGLETON_REGEX.search(logical_line)
1004 singleton = match.group(1) or match.group(3)
1005 same = (match.group(2) == '==')
1007 msg = "'if cond is %s:'" % (('' if same else 'not ') + singleton)
1008 if singleton in ('None',):
1012 nonzero = ((singleton == 'True' and same) or
1013 (singleton == 'False' and not same))
1014 msg += " or 'if %scond:'" % ('' if nonzero else 'not ')
1015 yield match.start(2), ("%s comparison to %s should be %s" %
1016 (code, singleton, msg))
1019 def comparison_negative(logical_line):
1020 r"""Negative comparison should be done using "not in" and "is not".
1022 Okay: if x not in y:\n pass
1023 Okay: assert (X in Y or X is Z)
1024 Okay: if not (X in Y):\n pass
1025 Okay: zz = x is not y
1026 E713: Z = not X in Y
1027 E713: if not X.B in Y:\n pass
1028 E714: if not X is Y:\n pass
1029 E714: Z = not X.B is Y
1031 match = COMPARE_NEGATIVE_REGEX.search(logical_line)
1033 pos = match.start(1)
1034 if match.group(2) == 'in':
1035 yield pos, "E713 test for membership should be 'not in'"
1037 yield pos, "E714 test for object identity should be 'is not'"
1040 def comparison_type(logical_line):
1041 r"""Object type comparisons should always use isinstance().
1043 Do not compare types directly.
1045 Okay: if isinstance(obj, int):
1046 E721: if type(obj) is type(1):
1048 When checking if an object is a string, keep in mind that it might be a
1049 unicode string too! In Python 2.3, str and unicode have a common base
1050 class, basestring, so you can do:
1052 Okay: if isinstance(obj, basestring):
1053 Okay: if type(a1) is type(b1):
1055 match = COMPARE_TYPE_REGEX.search(logical_line)
1057 inst = match.group(1)
1058 if inst and isidentifier(inst) and inst not in SINGLETONS:
1059 return # Allow comparison for types which are not obvious
1060 yield match.start(), "E721 do not compare types, use 'isinstance()'"
1063 def python_3000_has_key(logical_line, noqa):
1064 r"""The {}.has_key() method is removed in Python 3: use the 'in' operator.
1066 Okay: if "alph" in d:\n print d["alph"]
1067 W601: assert d.has_key('alph')
1069 pos = logical_line.find('.has_key(')
1070 if pos > -1 and not noqa:
1071 yield pos, "W601 .has_key() is deprecated, use 'in'"
1074 def python_3000_raise_comma(logical_line):
1075 r"""When raising an exception, use "raise ValueError('message')".
1077 The older form is removed in Python 3.
1079 Okay: raise DummyError("Message")
1080 W602: raise DummyError, "Message"
1082 match = RAISE_COMMA_REGEX.match(logical_line)
1083 if match and not RERAISE_COMMA_REGEX.match(logical_line):
1084 yield match.end() - 1, "W602 deprecated form of raising exception"
1087 def python_3000_not_equal(logical_line):
1088 r"""New code should always use != instead of <>.
1090 The older syntax is removed in Python 3.
1095 pos = logical_line.find('<>')
1097 yield pos, "W603 '<>' is deprecated, use '!='"
1100 def python_3000_backticks(logical_line):
1101 r"""Backticks are removed in Python 3: use repr() instead.
1103 Okay: val = repr(1 + 2)
1106 pos = logical_line.find('`')
1108 yield pos, "W604 backticks are deprecated, use 'repr()'"
1111 ##############################################################################
1113 ##############################################################################
1116 if '' == ''.encode():
1117 # Python 2: implicit encoding.
1118 def readlines(filename):
1119 """Read the source code."""
1120 with open(filename, 'rU') as f:
1121 return f.readlines()
1122 isidentifier = re.compile(r'[a-zA-Z_]\w*$').match
1123 stdin_get_value = sys.stdin.read
1126 def readlines(filename):
1127 """Read the source code."""
1129 with open(filename, 'rb') as f:
1130 (coding, lines) = tokenize.detect_encoding(f.readline)
1131 f = TextIOWrapper(f, coding, line_buffering=True)
1132 return [l.decode(coding) for l in lines] + f.readlines()
1133 except (LookupError, SyntaxError, UnicodeError):
1134 # Fall back if file encoding is improperly declared
1135 with open(filename, encoding='latin-1') as f:
1136 return f.readlines()
1137 isidentifier = str.isidentifier
1139 def stdin_get_value():
1140 return TextIOWrapper(sys.stdin.buffer, errors='ignore').read()
1141 noqa = re.compile(r'# no(?:qa|pep8)\b', re.I).search
1144 def expand_indent(line):
1145 r"""Return the amount of indentation.
1147 Tabs are expanded to the next multiple of 8.
1149 >>> expand_indent(' ')
1151 >>> expand_indent('\t')
1153 >>> expand_indent(' \t')
1155 >>> expand_indent(' \t')
1158 if '\t' not in line:
1159 return len(line) - len(line.lstrip())
1163 result = result // 8 * 8 + 8
1171 def mute_string(text):
1172 """Replace contents with 'xxx' to prevent syntax matching.
1174 >>> mute_string('"abc"')
1176 >>> mute_string("'''abc'''")
1178 >>> mute_string("r'abc'")
1181 # String modifiers (e.g. u or r)
1182 start = text.index(text[-1]) + 1
1185 if text[-3:] in ('"""', "'''"):
1188 return text[:start] + 'x' * (end - start) + text[end:]
1191 def parse_udiff(diff, patterns=None, parent='.'):
1192 """Return a dictionary of matching lines."""
1193 # For each file of the diff, the entry key is the filename,
1194 # and the value is a set of row numbers to consider.
1197 for line in diff.splitlines():
1202 if line[:3] == '@@ ':
1203 hunk_match = HUNK_REGEX.match(line)
1204 (row, nrows) = [int(g or '1') for g in hunk_match.groups()]
1205 rv[path].update(range(row, row + nrows))
1206 elif line[:3] == '+++':
1207 path = line[4:].split('\t', 1)[0]
1208 if path[:2] == 'b/':
1211 return dict([(os.path.join(parent, path), rows)
1212 for (path, rows) in rv.items()
1213 if rows and filename_match(path, patterns)])
1216 def normalize_paths(value, parent=os.curdir):
1217 """Parse a comma-separated list of paths.
1219 Return a list of absolute paths.
1223 if isinstance(value, list):
1226 for path in value.split(','):
1229 path = os.path.abspath(os.path.join(parent, path))
1230 paths.append(path.rstrip('/'))
1234 def filename_match(filename, patterns, default=True):
1235 """Check if patterns contains a pattern that matches filename.
1237 If patterns is unspecified, this always returns True.
1241 return any(fnmatch(filename, pattern) for pattern in patterns)
1244 def _is_eol_token(token):
1245 return token[0] in NEWLINE or token[4][token[3][1]:].lstrip() == '\\\n'
1247 def _is_eol_token(token, _eol_token=_is_eol_token):
1248 return _eol_token(token) or (token[0] == tokenize.COMMENT and
1249 token[1] == token[4])
1251 ##############################################################################
1252 # Framework to run all checks
1253 ##############################################################################
1256 _checks = {'physical_line': {}, 'logical_line': {}, 'tree': {}}
1259 def register_check(check, codes=None):
1260 """Register a new check object."""
1261 def _add_check(check, kind, codes, args):
1262 if check in _checks[kind]:
1263 _checks[kind][check][0].extend(codes or [])
1265 _checks[kind][check] = (codes or [''], args)
1266 if inspect.isfunction(check):
1267 args = inspect.getargspec(check)[0]
1268 if args and args[0] in ('physical_line', 'logical_line'):
1270 codes = ERRORCODE_REGEX.findall(check.__doc__ or '')
1271 _add_check(check, args[0], codes, args)
1272 elif inspect.isclass(check):
1273 if inspect.getargspec(check.__init__)[0][:2] == ['self', 'tree']:
1274 _add_check(check, 'tree', codes, None)
1277 def init_checks_registry():
1278 """Register all globally visible functions.
1280 The first argument name is either 'physical_line' or 'logical_line'.
1282 mod = inspect.getmodule(register_check)
1283 for (name, function) in inspect.getmembers(mod, inspect.isfunction):
1284 register_check(function)
1285 init_checks_registry()
1288 class Checker(object):
1289 """Load a Python source file, tokenize it, check coding style."""
1291 def __init__(self, filename=None, lines=None,
1292 options=None, report=None, **kwargs):
1294 options = StyleGuide(kwargs).options
1297 self._io_error = None
1298 self._physical_checks = options.physical_checks
1299 self._logical_checks = options.logical_checks
1300 self._ast_checks = options.ast_checks
1301 self.max_line_length = options.max_line_length
1302 self.multiline = False # in a multiline string?
1303 self.hang_closing = options.hang_closing
1304 self.verbose = options.verbose
1305 self.filename = filename
1306 # Dictionary where a checker can store its custom state.
1307 self._checker_states = {}
1308 if filename is None:
1309 self.filename = 'stdin'
1310 self.lines = lines or []
1311 elif filename == '-':
1312 self.filename = 'stdin'
1313 self.lines = stdin_get_value().splitlines(True)
1316 self.lines = readlines(filename)
1318 (exc_type, exc) = sys.exc_info()[:2]
1319 self._io_error = '%s: %s' % (exc_type.__name__, exc)
1324 ord0 = ord(self.lines[0][0])
1325 if ord0 in (0xef, 0xfeff): # Strip the UTF-8 BOM
1327 self.lines[0] = self.lines[0][1:]
1328 elif self.lines[0][:3] == '\xef\xbb\xbf':
1329 self.lines[0] = self.lines[0][3:]
1330 self.report = report or options.report
1331 self.report_error = self.report.error
1333 def report_invalid_syntax(self):
1334 """Check if the syntax is valid."""
1335 (exc_type, exc) = sys.exc_info()[:2]
1336 if len(exc.args) > 1:
1337 offset = exc.args[1]
1339 offset = offset[1:3]
1342 self.report_error(offset[0], offset[1] or 0,
1343 'E901 %s: %s' % (exc_type.__name__, exc.args[0]),
1344 self.report_invalid_syntax)
1347 """Get the next line from the input buffer."""
1348 if self.line_number >= self.total_lines:
1350 line = self.lines[self.line_number]
1351 self.line_number += 1
1352 if self.indent_char is None and line[:1] in WHITESPACE:
1353 self.indent_char = line[0]
1356 def run_check(self, check, argument_names):
1357 """Run a check plugin."""
1359 for name in argument_names:
1360 arguments.append(getattr(self, name))
1361 return check(*arguments)
1363 def init_checker_state(self, name, argument_names):
1364 """ Prepares a custom state for the specific checker plugin."""
1365 if 'checker_state' in argument_names:
1366 self.checker_state = self._checker_states.setdefault(name, {})
1368 def check_physical(self, line):
1369 """Run all physical checks on a raw input line."""
1370 self.physical_line = line
1371 for name, check, argument_names in self._physical_checks:
1372 self.init_checker_state(name, argument_names)
1373 result = self.run_check(check, argument_names)
1374 if result is not None:
1375 (offset, text) = result
1376 self.report_error(self.line_number, offset, text, check)
1377 if text[:4] == 'E101':
1378 self.indent_char = line[0]
1380 def build_tokens_line(self):
1381 """Build a logical line from tokens."""
1385 prev_row = prev_col = mapping = None
1386 for token_type, text, start, end, line in self.tokens:
1387 if token_type in SKIP_TOKENS:
1390 mapping = [(0, start)]
1391 if token_type == tokenize.COMMENT:
1392 comments.append(text)
1394 if token_type == tokenize.STRING:
1395 text = mute_string(text)
1397 (start_row, start_col) = start
1398 if prev_row != start_row: # different row
1399 prev_text = self.lines[prev_row - 1][prev_col - 1]
1400 if prev_text == ',' or (prev_text not in '{[('
1401 and text not in '}])'):
1403 elif prev_col != start_col: # different column
1404 text = line[prev_col:start_col] + text
1405 logical.append(text)
1407 mapping.append((length, end))
1408 (prev_row, prev_col) = end
1409 self.logical_line = ''.join(logical)
1410 self.noqa = comments and noqa(''.join(comments))
1413 def check_logical(self):
1414 """Build a line from tokens and run all logical checks on it."""
1415 self.report.increment_logical_line()
1416 mapping = self.build_tokens_line()
1421 (start_row, start_col) = mapping[0][1]
1422 start_line = self.lines[start_row - 1]
1423 self.indent_level = expand_indent(start_line[:start_col])
1424 if self.blank_before < self.blank_lines:
1425 self.blank_before = self.blank_lines
1426 if self.verbose >= 2:
1427 print(self.logical_line[:80].rstrip())
1428 for name, check, argument_names in self._logical_checks:
1429 if self.verbose >= 4:
1431 self.init_checker_state(name, argument_names)
1432 for offset, text in self.run_check(check, argument_names) or ():
1433 if not isinstance(offset, tuple):
1434 for token_offset, pos in mapping:
1435 if offset <= token_offset:
1437 offset = (pos[0], pos[1] + offset - token_offset)
1438 self.report_error(offset[0], offset[1], text, check)
1439 if self.logical_line:
1440 self.previous_indent_level = self.indent_level
1441 self.previous_logical = self.logical_line
1442 self.blank_lines = 0
1445 def check_ast(self):
1446 """Build the file's AST and run all AST checks."""
1448 tree = compile(''.join(self.lines), '', 'exec', PyCF_ONLY_AST)
1449 except (SyntaxError, TypeError):
1450 return self.report_invalid_syntax()
1451 for name, cls, __ in self._ast_checks:
1452 checker = cls(tree, self.filename)
1453 for lineno, offset, text, check in checker.run():
1454 if not self.lines or not noqa(self.lines[lineno - 1]):
1455 self.report_error(lineno, offset, text, check)
1457 def generate_tokens(self):
1458 """Tokenize the file, run physical line checks and yield tokens."""
1460 self.report_error(1, 0, 'E902 %s' % self._io_error, readlines)
1461 tokengen = tokenize.generate_tokens(self.readline)
1463 for token in tokengen:
1464 if token[2][0] > self.total_lines:
1466 self.maybe_check_physical(token)
1468 except (SyntaxError, tokenize.TokenError):
1469 self.report_invalid_syntax()
1471 def maybe_check_physical(self, token):
1472 """If appropriate (based on token), check current physical line(s)."""
1473 # Called after every token, but act only on end of line.
1474 if _is_eol_token(token):
1475 # Obviously, a newline token ends a single physical line.
1476 self.check_physical(token[4])
1477 elif token[0] == tokenize.STRING and '\n' in token[1]:
1478 # Less obviously, a string that contains newlines is a
1479 # multiline string, either triple-quoted or with internal
1480 # newlines backslash-escaped. Check every physical line in the
1481 # string *except* for the last one: its newline is outside of
1482 # the multiline string, so we consider it a regular physical
1483 # line, and will check it like any other physical line.
1486 # - we don't *completely* ignore the last line; if it contains
1487 # the magical "# noqa" comment, we disable all physical
1488 # checks for the entire multiline string
1489 # - have to wind self.line_number back because initially it
1490 # points to the last line of the string, and we want
1491 # check_physical() to give accurate feedback
1494 self.multiline = True
1495 self.line_number = token[2][0]
1496 for line in token[1].split('\n')[:-1]:
1497 self.check_physical(line + '\n')
1498 self.line_number += 1
1499 self.multiline = False
1501 def check_all(self, expected=None, line_offset=0):
1502 """Run all checks on the input file."""
1503 self.report.init_file(self.filename, self.lines, expected, line_offset)
1504 self.total_lines = len(self.lines)
1505 if self._ast_checks:
1507 self.line_number = 0
1508 self.indent_char = None
1509 self.indent_level = self.previous_indent_level = 0
1510 self.previous_logical = ''
1512 self.blank_lines = self.blank_before = 0
1514 for token in self.generate_tokens():
1515 self.tokens.append(token)
1516 token_type, text = token[0:2]
1517 if self.verbose >= 3:
1518 if token[2][0] == token[3][0]:
1519 pos = '[%s:%s]' % (token[2][1] or '', token[3][1])
1521 pos = 'l.%s' % token[3][0]
1522 print('l.%s\t%s\t%s\t%r' %
1523 (token[2][0], pos, tokenize.tok_name[token[0]], text))
1524 if token_type == tokenize.OP:
1530 if token_type in NEWLINE:
1531 if token_type == tokenize.NEWLINE:
1532 self.check_logical()
1533 self.blank_before = 0
1534 elif len(self.tokens) == 1:
1535 # The physical line contains only this token.
1536 self.blank_lines += 1
1539 self.check_logical()
1540 elif COMMENT_WITH_NL and token_type == tokenize.COMMENT:
1541 if len(self.tokens) == 1:
1542 # The comment also ends a physical line
1544 token[1] = text.rstrip('\r\n')
1545 token[3] = (token[2][0], token[2][1] + len(token[1]))
1546 self.tokens = [tuple(token)]
1547 self.check_logical()
1549 self.check_physical(self.lines[-1])
1550 self.check_logical()
1551 return self.report.get_file_results()
1554 class BaseReport(object):
1555 """Collect the results of the checks."""
1557 print_filename = False
1559 def __init__(self, options):
1560 self._benchmark_keys = options.benchmark_keys
1561 self._ignore_code = options.ignore_code
1564 self.total_errors = 0
1565 self.counters = dict.fromkeys(self._benchmark_keys, 0)
1569 """Start the timer."""
1570 self._start_time = time.time()
1573 """Stop the timer."""
1574 self.elapsed = time.time() - self._start_time
1576 def init_file(self, filename, lines, expected, line_offset):
1577 """Signal a new file."""
1578 self.filename = filename
1580 self.expected = expected or ()
1581 self.line_offset = line_offset
1582 self.file_errors = 0
1583 self.counters['files'] += 1
1584 self.counters['physical lines'] += len(lines)
1586 def increment_logical_line(self):
1587 """Signal a new logical line."""
1588 self.counters['logical lines'] += 1
1590 def error(self, line_number, offset, text, check):
1591 """Report an error, according to options."""
1593 if self._ignore_code(code):
1595 if code in self.counters:
1596 self.counters[code] += 1
1598 self.counters[code] = 1
1599 self.messages[code] = text[5:]
1600 # Don't care about expected errors or warnings
1601 if code in self.expected:
1603 if self.print_filename and not self.file_errors:
1604 print(self.filename)
1605 self.file_errors += 1
1606 self.total_errors += 1
1609 def get_file_results(self):
1610 """Return the count of errors and warnings for this file."""
1611 return self.file_errors
1613 def get_count(self, prefix=''):
1614 """Return the total count of errors and warnings."""
1615 return sum([self.counters[key]
1616 for key in self.messages if key.startswith(prefix)])
1618 def get_statistics(self, prefix=''):
1619 """Get statistics for message codes that start with the prefix.
1621 prefix='' matches all errors and warnings
1622 prefix='E' matches all errors
1623 prefix='W' matches all warnings
1624 prefix='E4' matches all errors that have to do with imports
1626 return ['%-7s %s %s' % (self.counters[key], key, self.messages[key])
1627 for key in sorted(self.messages) if key.startswith(prefix)]
1629 def print_statistics(self, prefix=''):
1630 """Print overall statistics (number of errors and warnings)."""
1631 for line in self.get_statistics(prefix):
1634 def print_benchmark(self):
1635 """Print benchmark numbers."""
1636 print('%-7.2f %s' % (self.elapsed, 'seconds elapsed'))
1638 for key in self._benchmark_keys:
1639 print('%-7d %s per second (%d total)' %
1640 (self.counters[key] / self.elapsed, key,
1641 self.counters[key]))
1644 class FileReport(BaseReport):
1645 """Collect the results of the checks and print only the filenames."""
1646 print_filename = True
1649 class StandardReport(BaseReport):
1650 """Collect and print the results of the checks."""
1652 def __init__(self, options):
1653 super(StandardReport, self).__init__(options)
1654 self._fmt = REPORT_FORMAT.get(options.format.lower(),
1656 self._repeat = options.repeat
1657 self._show_source = options.show_source
1658 self._show_pep8 = options.show_pep8
1660 def init_file(self, filename, lines, expected, line_offset):
1661 """Signal a new file."""
1662 self._deferred_print = []
1663 return super(StandardReport, self).init_file(
1664 filename, lines, expected, line_offset)
1666 def error(self, line_number, offset, text, check):
1667 """Report an error, according to options."""
1668 code = super(StandardReport, self).error(line_number, offset,
1670 if code and (self.counters[code] == 1 or self._repeat):
1671 self._deferred_print.append(
1672 (line_number, offset, code, text[5:], check.__doc__))
1675 def get_file_results(self):
1676 """Print the result and return the overall count for this file."""
1677 self._deferred_print.sort()
1678 for line_number, offset, code, text, doc in self._deferred_print:
1680 'path': self.filename,
1681 'row': self.line_offset + line_number, 'col': offset + 1,
1682 'code': code, 'text': text,
1684 if self._show_source:
1685 if line_number > len(self.lines):
1688 line = self.lines[line_number - 1]
1689 print(line.rstrip())
1690 print(re.sub(r'\S', ' ', line[:offset]) + '^')
1691 if self._show_pep8 and doc:
1692 print(' ' + doc.strip())
1693 return self.file_errors
1696 class DiffReport(StandardReport):
1697 """Collect and print the results for the changed lines only."""
1699 def __init__(self, options):
1700 super(DiffReport, self).__init__(options)
1701 self._selected = options.selected_lines
1703 def error(self, line_number, offset, text, check):
1704 if line_number not in self._selected[self.filename]:
1706 return super(DiffReport, self).error(line_number, offset, text, check)
1709 class StyleGuide(object):
1710 """Initialize a PEP-8 instance with few options."""
1712 def __init__(self, *args, **kwargs):
1713 # build options from the command line
1714 self.checker_class = kwargs.pop('checker_class', Checker)
1715 parse_argv = kwargs.pop('parse_argv', False)
1716 config_file = kwargs.pop('config_file', None)
1717 parser = kwargs.pop('parser', None)
1718 # build options from dict
1719 options_dict = dict(*args, **kwargs)
1720 arglist = None if parse_argv else options_dict.get('paths', None)
1721 options, self.paths = process_options(
1722 arglist, parse_argv, config_file, parser)
1724 options.__dict__.update(options_dict)
1725 if 'paths' in options_dict:
1726 self.paths = options_dict['paths']
1728 self.runner = self.input_file
1729 self.options = options
1731 if not options.reporter:
1732 options.reporter = BaseReport if options.quiet else StandardReport
1734 options.select = tuple(options.select or ())
1735 if not (options.select or options.ignore or
1736 options.testsuite or options.doctest) and DEFAULT_IGNORE:
1737 # The default choice: ignore controversial checks
1738 options.ignore = tuple(DEFAULT_IGNORE.split(','))
1740 # Ignore all checks which are not explicitly selected
1741 options.ignore = ('',) if options.select else tuple(options.ignore)
1742 options.benchmark_keys = BENCHMARK_KEYS[:]
1743 options.ignore_code = self.ignore_code
1744 options.physical_checks = self.get_checks('physical_line')
1745 options.logical_checks = self.get_checks('logical_line')
1746 options.ast_checks = self.get_checks('tree')
1749 def init_report(self, reporter=None):
1750 """Initialize the report instance."""
1751 self.options.report = (reporter or self.options.reporter)(self.options)
1752 return self.options.report
1754 def check_files(self, paths=None):
1755 """Run all checks on the paths."""
1758 report = self.options.report
1759 runner = self.runner
1763 if os.path.isdir(path):
1764 self.input_dir(path)
1765 elif not self.excluded(path):
1767 except KeyboardInterrupt:
1768 print('... stopped')
1772 def input_file(self, filename, lines=None, expected=None, line_offset=0):
1773 """Run all checks on a Python source file."""
1774 if self.options.verbose:
1775 print('checking %s' % filename)
1776 fchecker = self.checker_class(
1777 filename, lines=lines, options=self.options)
1778 return fchecker.check_all(expected=expected, line_offset=line_offset)
1780 def input_dir(self, dirname):
1781 """Check all files in this directory and all subdirectories."""
1782 dirname = dirname.rstrip('/')
1783 if self.excluded(dirname):
1785 counters = self.options.report.counters
1786 verbose = self.options.verbose
1787 filepatterns = self.options.filename
1788 runner = self.runner
1789 for root, dirs, files in os.walk(dirname):
1791 print('directory ' + root)
1792 counters['directories'] += 1
1793 for subdir in sorted(dirs):
1794 if self.excluded(subdir, root):
1796 for filename in sorted(files):
1797 # contain a pattern that matches?
1798 if ((filename_match(filename, filepatterns) and
1799 not self.excluded(filename, root))):
1800 runner(os.path.join(root, filename))
1802 def excluded(self, filename, parent=None):
1803 """Check if the file should be excluded.
1805 Check if 'options.exclude' contains a pattern that matches filename.
1807 if not self.options.exclude:
1809 basename = os.path.basename(filename)
1810 if filename_match(basename, self.options.exclude):
1813 filename = os.path.join(parent, filename)
1814 filename = os.path.abspath(filename)
1815 return filename_match(filename, self.options.exclude)
1817 def ignore_code(self, code):
1818 """Check if the error code should be ignored.
1820 If 'options.select' contains a prefix of the error code,
1821 return False. Else, if 'options.ignore' contains a prefix of
1822 the error code, return True.
1824 if len(code) < 4 and any(s.startswith(code)
1825 for s in self.options.select):
1827 return (code.startswith(self.options.ignore) and
1828 not code.startswith(self.options.select))
1830 def get_checks(self, argument_name):
1831 """Get all the checks for this category.
1833 Find all globally visible functions where the first argument name
1834 starts with argument_name and which contain selected tests.
1837 for check, attrs in _checks[argument_name].items():
1838 (codes, args) = attrs
1839 if any(not (code and self.ignore_code(code)) for code in codes):
1840 checks.append((check.__name__, check, args))
1841 return sorted(checks)
1844 def get_parser(prog='pep8', version=__version__):
1845 parser = OptionParser(prog=prog, version=version,
1846 usage="%prog [options] input ...")
1847 parser.config_options = [
1848 'exclude', 'filename', 'select', 'ignore', 'max-line-length',
1849 'hang-closing', 'count', 'format', 'quiet', 'show-pep8',
1850 'show-source', 'statistics', 'verbose']
1851 parser.add_option('-v', '--verbose', default=0, action='count',
1852 help="print status messages, or debug with -vv")
1853 parser.add_option('-q', '--quiet', default=0, action='count',
1854 help="report only file names, or nothing with -qq")
1855 parser.add_option('-r', '--repeat', default=True, action='store_true',
1856 help="(obsolete) show all occurrences of the same error")
1857 parser.add_option('--first', action='store_false', dest='repeat',
1858 help="show first occurrence of each error")
1859 parser.add_option('--exclude', metavar='patterns', default=DEFAULT_EXCLUDE,
1860 help="exclude files or directories which match these "
1861 "comma separated patterns (default: %default)")
1862 parser.add_option('--filename', metavar='patterns', default='*.py',
1863 help="when parsing directories, only check filenames "
1864 "matching these comma separated patterns "
1865 "(default: %default)")
1866 parser.add_option('--select', metavar='errors', default='',
1867 help="select errors and warnings (e.g. E,W6)")
1868 parser.add_option('--ignore', metavar='errors', default='',
1869 help="skip errors and warnings (e.g. E4,W)")
1870 parser.add_option('--show-source', action='store_true',
1871 help="show source code for each error")
1872 parser.add_option('--show-pep8', action='store_true',
1873 help="show text of PEP 8 for each error "
1874 "(implies --first)")
1875 parser.add_option('--statistics', action='store_true',
1876 help="count errors and warnings")
1877 parser.add_option('--count', action='store_true',
1878 help="print total number of errors and warnings "
1879 "to standard error and set exit code to 1 if "
1880 "total is not null")
1881 parser.add_option('--max-line-length', type='int', metavar='n',
1882 default=MAX_LINE_LENGTH,
1883 help="set maximum allowed line length "
1884 "(default: %default)")
1885 parser.add_option('--hang-closing', action='store_true',
1886 help="hang closing bracket instead of matching "
1887 "indentation of opening bracket's line")
1888 parser.add_option('--format', metavar='format', default='default',
1889 help="set the error format [default|pylint|<custom>]")
1890 parser.add_option('--diff', action='store_true',
1891 help="report only lines changed according to the "
1892 "unified diff received on STDIN")
1893 group = parser.add_option_group("Testing Options")
1894 if os.path.exists(TESTSUITE_PATH):
1895 group.add_option('--testsuite', metavar='dir',
1896 help="run regression tests from dir")
1897 group.add_option('--doctest', action='store_true',
1898 help="run doctest on myself")
1899 group.add_option('--benchmark', action='store_true',
1900 help="measure processing speed")
1904 def read_config(options, args, arglist, parser):
1905 """Read both user configuration and local configuration."""
1906 config = RawConfigParser()
1908 user_conf = options.config
1909 if user_conf and os.path.isfile(user_conf):
1911 print('user configuration: %s' % user_conf)
1912 config.read(user_conf)
1914 local_dir = os.curdir
1915 parent = tail = args and os.path.abspath(os.path.commonprefix(args))
1917 if config.read([os.path.join(parent, fn) for fn in PROJECT_CONFIG]):
1920 print('local configuration: in %s' % parent)
1922 (parent, tail) = os.path.split(parent)
1924 pep8_section = parser.prog
1925 if config.has_section(pep8_section):
1926 option_list = dict([(o.dest, o.type or o.action)
1927 for o in parser.option_list])
1929 # First, read the default values
1930 (new_options, __) = parser.parse_args([])
1932 # Second, parse the configuration
1933 for opt in config.options(pep8_section):
1934 if opt.replace('_', '-') not in parser.config_options:
1935 print(" unknown option '%s' ignored" % opt)
1937 if options.verbose > 1:
1938 print(" %s = %s" % (opt, config.get(pep8_section, opt)))
1939 normalized_opt = opt.replace('-', '_')
1940 opt_type = option_list[normalized_opt]
1941 if opt_type in ('int', 'count'):
1942 value = config.getint(pep8_section, opt)
1943 elif opt_type == 'string':
1944 value = config.get(pep8_section, opt)
1945 if normalized_opt == 'exclude':
1946 value = normalize_paths(value, local_dir)
1948 assert opt_type in ('store_true', 'store_false')
1949 value = config.getboolean(pep8_section, opt)
1950 setattr(new_options, normalized_opt, value)
1952 # Third, overwrite with the command-line options
1953 (options, __) = parser.parse_args(arglist, values=new_options)
1954 options.doctest = options.testsuite = False
1958 def process_options(arglist=None, parse_argv=False, config_file=None,
1960 """Process options passed either via arglist or via command line args."""
1962 parser = get_parser()
1963 if not parser.has_option('--config'):
1964 if config_file is True:
1965 config_file = DEFAULT_CONFIG
1966 group = parser.add_option_group("Configuration", description=(
1967 "The project options are read from the [%s] section of the "
1968 "tox.ini file or the setup.cfg file located in any parent folder "
1969 "of the path(s) being processed. Allowed options are: %s." %
1970 (parser.prog, ', '.join(parser.config_options))))
1971 group.add_option('--config', metavar='path', default=config_file,
1972 help="user config file location (default: %default)")
1973 # Don't read the command line if the module is used as a library.
1974 if not arglist and not parse_argv:
1976 # If parse_argv is True and arglist is None, arguments are
1977 # parsed from the command line (sys.argv)
1978 (options, args) = parser.parse_args(arglist)
1979 options.reporter = None
1981 if options.ensure_value('testsuite', False):
1982 args.append(options.testsuite)
1983 elif not options.ensure_value('doctest', False):
1984 if parse_argv and not args:
1985 if options.diff or any(os.path.exists(name)
1986 for name in PROJECT_CONFIG):
1989 parser.error('input not specified')
1990 options = read_config(options, args, arglist, parser)
1991 options.reporter = parse_argv and options.quiet == 1 and FileReport
1993 options.filename = options.filename and options.filename.split(',')
1994 options.exclude = normalize_paths(options.exclude)
1995 options.select = options.select and options.select.split(',')
1996 options.ignore = options.ignore and options.ignore.split(',')
1999 options.reporter = DiffReport
2000 stdin = stdin_get_value()
2001 options.selected_lines = parse_udiff(stdin, options.filename, args[0])
2002 args = sorted(options.selected_lines)
2004 return options, args
2008 """Parse options and run checks on Python source."""
2011 # Handle "Broken pipe" gracefully
2013 signal.signal(signal.SIGPIPE, lambda signum, frame: sys.exit(1))
2014 except AttributeError:
2015 pass # not supported on Windows
2017 pep8style = StyleGuide(parse_argv=True, config_file=True)
2018 options = pep8style.options
2019 if options.doctest or options.testsuite:
2020 from testsuite.support import run_tests
2021 report = run_tests(pep8style)
2023 report = pep8style.check_files()
2024 if options.statistics:
2025 report.print_statistics()
2026 if options.benchmark:
2027 report.print_benchmark()
2028 if options.testsuite and not options.quiet:
2029 report.print_results()
2030 if report.total_errors:
2032 sys.stderr.write(str(report.total_errors) + '\n')
2035 if __name__ == '__main__':