# pep8.py - Check Python source code formatting, according to PEP 8
# Copyright (C) 2006-2009 Johann C. Rocholl <johann@rocholl.net>
# Copyright (C) 2009-2014 Florent Xicluna <florent.xicluna@gmail.com>
+# Copyright (C) 2014 Ian Lee <ianlee1521@gmail.com>
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation files
"""
from __future__ import with_statement
-__version__ = '1.5.3'
-
import os
import sys
import re
except ImportError:
from ConfigParser import RawConfigParser
-DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git,__pycache__'
-DEFAULT_IGNORE = 'E123,E226,E24'
-if sys.platform == 'win32':
- DEFAULT_CONFIG = os.path.expanduser(r'~\.pep8')
-else:
- DEFAULT_CONFIG = os.path.join(os.getenv('XDG_CONFIG_HOME') or
- os.path.expanduser('~/.config'), 'pep8')
+__version__ = '1.6.0a0'
+
+DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git,__pycache__,.tox'
+DEFAULT_IGNORE = 'E121,E123,E126,E226,E24,E704'
+try:
+ if sys.platform == 'win32':
+ DEFAULT_CONFIG = os.path.expanduser(r'~\.pep8')
+ else:
+ DEFAULT_CONFIG = os.path.join(os.getenv('XDG_CONFIG_HOME') or
+ os.path.expanduser('~/.config'), 'pep8')
+except ImportError:
+ DEFAULT_CONFIG = None
+
PROJECT_CONFIG = ('setup.cfg', 'tox.ini', '.pep8')
TESTSUITE_PATH = os.path.join(os.path.dirname(__file__), 'testsuite')
MAX_LINE_LENGTH = 79
'**=', '*=', '/=', '//=', '+=', '-=', '!=', '<>', '<', '>',
'%=', '^=', '&=', '|=', '==', '<=', '>=', '<<=', '>>=', '='])
WHITESPACE = frozenset(' \t')
-SKIP_TOKENS = frozenset([tokenize.COMMENT, tokenize.NL, tokenize.NEWLINE,
- tokenize.INDENT, tokenize.DEDENT])
+NEWLINE = frozenset([tokenize.NL, tokenize.NEWLINE])
+SKIP_TOKENS = NEWLINE.union([tokenize.INDENT, tokenize.DEDENT])
+# ERRORTOKEN is triggered by backticks in Python 3
+SKIP_COMMENTS = SKIP_TOKENS.union([tokenize.COMMENT, tokenize.ERRORTOKEN])
BENCHMARK_KEYS = ['directories', 'files', 'logical lines', 'physical lines']
INDENT_REGEX = re.compile(r'([ \t]*)')
DOCSTRING_REGEX = re.compile(r'u?r?["\']')
EXTRANEOUS_WHITESPACE_REGEX = re.compile(r'[[({] | []}),;:]')
WHITESPACE_AFTER_COMMA_REGEX = re.compile(r'[,;:]\s*(?: |\t)')
-COMPARE_SINGLETON_REGEX = re.compile(r'([=!]=)\s*(None|False|True)')
-COMPARE_NEGATIVE_REGEX = re.compile(r'\b(not)\s+[^[({ ]+\s+(in|is)\s')
+COMPARE_SINGLETON_REGEX = re.compile(r'\b(None|False|True)?\s*([=!]=)'
+ r'\s*(?(1)|(None|False|True))\b')
+COMPARE_NEGATIVE_REGEX = re.compile(r'\b(not)\s+[^][)(}{ ]+\s+(in|is)\s')
COMPARE_TYPE_REGEX = re.compile(r'(?:[=!]=|is(?:\s+not)?)\s*type(?:s.\w+Type'
r'|\s*\(\s*([^)]*[^ )])\s*\))')
KEYWORD_REGEX = re.compile(r'(\s*)\b(?:%s)\b(\s*)' % r'|'.join(KEYWORDS))
return 0, "W293 blank line contains whitespace"
-def trailing_blank_lines(physical_line, lines, line_number):
+def trailing_blank_lines(physical_line, lines, line_number, total_lines):
r"""Trailing blank lines are superfluous.
Okay: spam(1)
W391: spam(1)\n
- """
- if not physical_line.rstrip() and line_number == len(lines):
- return 0, "W391 blank line at end of file"
-
-def missing_newline(physical_line):
- r"""The last line should have a newline.
-
- Reports warning W292.
+ However the last line should end with a new line (warning W292).
"""
- if physical_line.rstrip() == physical_line:
- return len(physical_line), "W292 no newline at end of file"
+ if line_number == total_lines:
+ stripped_last_line = physical_line.rstrip()
+ if not stripped_last_line:
+ return 0, "W391 blank line at end of file"
+ if stripped_last_line == physical_line:
+ return len(physical_line), "W292 no newline at end of file"
def maximum_line_length(physical_line, max_line_length, multiline):
Okay: a = 1
Okay: if a == 0:\n a = 1
E111: a = 1
+ E114: # a = 1
Okay: for item in items:\n pass
E112: for item in items:\npass
+ E115: for item in items:\n# Hi\n pass
Okay: a = 1\nb = 2
E113: a = 1\n b = 2
+ E116: a = 1\n # b = 2
"""
- if indent_char == ' ' and indent_level % 4:
- yield 0, "E111 indentation is not a multiple of four"
+ c = 0 if logical_line else 3
+ tmpl = "E11%d %s" if logical_line else "E11%d %s (comment)"
+ if indent_level % 4:
+ yield 0, tmpl % (1 + c, "indentation is not a multiple of four")
indent_expect = previous_logical.endswith(':')
if indent_expect and indent_level <= previous_indent_level:
- yield 0, "E112 expected an indented block"
- if indent_level > previous_indent_level and not indent_expect:
- yield 0, "E113 unexpected indentation"
+ yield 0, tmpl % (2 + c, "expected an indented block")
+ elif not indent_expect and indent_level > previous_indent_level:
+ yield 0, tmpl % (3 + c, "unexpected indentation")
def continued_indentation(logical_line, tokens, indent_level, hang_closing,
newline = row < start[0] - first_row
if newline:
row = start[0] - first_row
- newline = (not last_token_multiline and
- token_type not in (tokenize.NL, tokenize.NEWLINE))
+ newline = not last_token_multiline and token_type not in NEWLINE
if newline:
# this is the beginning of a continuation line.
prev_type = tokenize.OP
prev_text = prev_end = None
for token_type, text, start, end, line in tokens:
- if token_type in (tokenize.NL, tokenize.NEWLINE, tokenize.ERRORTOKEN):
- # ERRORTOKEN is triggered by backticks in Python 3
+ if token_type in SKIP_COMMENTS:
continue
if text in ('(', 'lambda'):
parens += 1
if need_space is True or need_space[1]:
# A needed trailing space was not found
yield prev_end, "E225 missing whitespace around operator"
- else:
+ elif prev_text != '**':
code, optype = 'E226', 'arithmetic'
if prev_text == '%':
code, optype = 'E228', 'modulo'
# Check if the operator is being used as a binary operator
# Allow unary operators: -123, -x, +1.
# Allow argument unpacking: foo(*args, **kwargs).
- if prev_type == tokenize.OP:
- binary_usage = (prev_text in '}])')
- elif prev_type == tokenize.NAME:
- binary_usage = (prev_text not in KEYWORDS)
- else:
- binary_usage = (prev_type not in SKIP_TOKENS)
-
- if binary_usage:
+ if (prev_text in '}])' if prev_type == tokenize.OP
+ else prev_text not in KEYWORDS):
need_space = None
elif text in WS_OPTIONAL_OPERATORS:
need_space = None
Okay: boolean(a != b)
Okay: boolean(a <= b)
Okay: boolean(a >= b)
+ Okay: def foo(arg: int = 42):
E251: def complex(real, imag = 0.0):
E251: return magic(r = real, i = imag)
parens = 0
no_space = False
prev_end = None
+ annotated_func_arg = False
+ in_def = logical_line.startswith('def')
message = "E251 unexpected spaces around keyword / parameter equals"
for token_type, text, start, end, line in tokens:
if token_type == tokenize.NL:
no_space = False
if start != prev_end:
yield (prev_end, message)
- elif token_type == tokenize.OP:
+ if token_type == tokenize.OP:
if text == '(':
parens += 1
elif text == ')':
parens -= 1
- elif parens and text == '=':
+ elif in_def and text == ':' and parens == 1:
+ annotated_func_arg = True
+ elif parens and text == ',' and parens == 1:
+ annotated_func_arg = False
+ elif parens and text == '=' and not annotated_func_arg:
no_space = True
if start != prev_end:
yield (prev_end, message)
+ if not parens:
+ annotated_func_arg = False
+
prev_end = end
E262: x = x + 1 #Increment x
E262: x = x + 1 # Increment x
E265: #Block comment
+ E266: ### Block comment
"""
prev_end = (0, 0)
for token_type, text, start, end, line in tokens:
yield (prev_end,
"E261 at least two spaces before inline comment")
symbol, sp, comment = text.partition(' ')
- bad_prefix = symbol not in ('#', '#:')
+ bad_prefix = symbol not in '#:' and (symbol.lstrip('#')[:1] or '#')
if inline_comment:
- if bad_prefix or comment[:1].isspace():
+ if bad_prefix or comment[:1] in WHITESPACE:
yield start, "E262 inline comment should start with '# '"
- elif bad_prefix:
- if text.rstrip('#') and (start[0] > 1 or symbol[1] != '!'):
+ elif bad_prefix and (bad_prefix != '!' or start[0] > 1):
+ if bad_prefix != '#':
yield start, "E265 block comment should start with '# '"
+ elif comment:
+ yield start, "E266 too many leading '#' for block comment"
elif token_type != tokenize.NL:
prev_end = end
yield found, "E401 multiple imports on one line"
+def module_imports_on_top_of_file(
+ logical_line, indent_level, checker_state, noqa):
+ r"""Imports are always put at the top of the file, just after any module
+ comments and docstrings, and before module globals and constants.
+
+ Okay: import os
+ Okay: # this is a comment\nimport os
+ Okay: '''this is a module docstring'''\nimport os
+ Okay: r'''this is a module docstring'''\nimport os
+ Okay: try:\n import x\nexcept:\n pass\nelse:\n pass\nimport y
+ Okay: try:\n import x\nexcept:\n pass\nfinally:\n pass\nimport y
+ E402: a=1\nimport os
+ E402: 'One string'\n"Two string"\nimport os
+ E402: a=1\nfrom sys import x
+
+ Okay: if x:\n import os
+ """
+ def is_string_literal(line):
+ if line[0] in 'uUbB':
+ line = line[1:]
+ if line and line[0] in 'rR':
+ line = line[1:]
+ return line and (line[0] == '"' or line[0] == "'")
+
+ allowed_try_keywords = ('try', 'except', 'else', 'finally')
+
+ if indent_level: # Allow imports in conditional statements or functions
+ return
+ if not logical_line: # Allow empty lines or comments
+ return
+ if noqa:
+ return
+ line = logical_line
+ if line.startswith('import ') or line.startswith('from '):
+ if checker_state.get('seen_non_imports', False):
+ yield 0, "E402 module level import not at top of file"
+ elif any(line.startswith(kw) for kw in allowed_try_keywords):
+ # Allow try, except, else, finally keywords intermixed with imports in
+ # order to support conditional importing
+ return
+ elif is_string_literal(line):
+ # The first literal is a docstring, allow it. Otherwise, report error.
+ if checker_state.get('seen_docstring', False):
+ checker_state['seen_non_imports'] = True
+ else:
+ checker_state['seen_docstring'] = True
+ else:
+ checker_state['seen_non_imports'] = True
+
+
def compound_statements(logical_line):
r"""Compound statements (on the same line) are generally discouraged.
on the same line, never do this for multi-clause statements.
Also avoid folding such long lines!
+ Always use a def statement instead of an assignment statement that
+ binds a lambda expression directly to a name.
+
Okay: if foo == 'blah':\n do_blah_thing()
Okay: do_one()
Okay: do_two()
E701: try: something()
E701: finally: cleanup()
E701: if foo == 'blah': one(); two(); three()
-
E702: do_one(); do_two(); do_three()
E703: do_four(); # useless semicolon
+ E704: def f(x): return 2*x
+ E731: f = lambda x: 2*x
"""
line = logical_line
last_char = len(line) - 1
found = line.find(':')
while -1 < found < last_char:
before = line[:found]
- if (before.count('{') <= before.count('}') and # {'a': 1} (dict)
- before.count('[') <= before.count(']') and # [1:2] (slice)
- before.count('(') <= before.count(')') and # (Python 3 annotation)
- not LAMBDA_REGEX.search(before)): # lambda x: x
- yield found, "E701 multiple statements on one line (colon)"
+ if ((before.count('{') <= before.count('}') and # {'a': 1} (dict)
+ before.count('[') <= before.count(']') and # [1:2] (slice)
+ before.count('(') <= before.count(')'))): # (annotation)
+ lambda_kw = LAMBDA_REGEX.search(before)
+ if lambda_kw:
+ before = line[:lambda_kw.start()].rstrip()
+ if before[-1:] == '=' and isidentifier(before[:-1].strip()):
+ yield 0, ("E731 do not assign a lambda expression, use a "
+ "def")
+ break
+ if before.startswith('def '):
+ yield 0, "E704 multiple statements on one line (def)"
+ else:
+ yield found, "E701 multiple statements on one line (colon)"
found = line.find(':', found + 1)
found = line.find(';')
while -1 < found:
Okay: if arg is not None:
E711: if arg != None:
+ E711: if None == arg:
E712: if arg == True:
+ E712: if False == arg:
Also, beware of writing if x when you really mean if x is not None --
e.g. when testing whether a variable or argument that defaults to None was
"""
match = not noqa and COMPARE_SINGLETON_REGEX.search(logical_line)
if match:
- same = (match.group(1) == '==')
- singleton = match.group(2)
+ singleton = match.group(1) or match.group(3)
+ same = (match.group(2) == '==')
+
msg = "'if cond is %s:'" % (('' if same else 'not ') + singleton)
if singleton in ('None',):
code = 'E711'
nonzero = ((singleton == 'True' and same) or
(singleton == 'False' and not same))
msg += " or 'if %scond:'" % ('' if nonzero else 'not ')
- yield match.start(1), ("%s comparison to %s should be %s" %
+ yield match.start(2), ("%s comparison to %s should be %s" %
(code, singleton, msg))
# Python 2: implicit encoding.
def readlines(filename):
"""Read the source code."""
- with open(filename) as f:
+ with open(filename, 'rU') as f:
return f.readlines()
- isidentifier = re.compile(r'[a-zA-Z_]\w*').match
+ isidentifier = re.compile(r'[a-zA-Z_]\w*$').match
stdin_get_value = sys.stdin.read
else:
# Python 3
Return a list of absolute paths.
"""
- if not value or isinstance(value, list):
+ if not value:
+ return []
+ if isinstance(value, list):
return value
paths = []
for path in value.split(','):
+ path = path.strip()
if '/' in path:
path = os.path.abspath(os.path.join(parent, path))
paths.append(path.rstrip('/'))
return any(fnmatch(filename, pattern) for pattern in patterns)
+def _is_eol_token(token):
+ return token[0] in NEWLINE or token[4][token[3][1]:].lstrip() == '\\\n'
if COMMENT_WITH_NL:
- def _is_eol_token(token):
- return (token[0] in (tokenize.NEWLINE, tokenize.NL) or
- (token[0] == tokenize.COMMENT and token[1] == token[4]))
-else:
- def _is_eol_token(token):
- return token[0] in (tokenize.NEWLINE, tokenize.NL)
-
+ def _is_eol_token(token, _eol_token=_is_eol_token):
+ return _eol_token(token) or (token[0] == tokenize.COMMENT and
+ token[1] == token[4])
##############################################################################
# Framework to run all checks
self.hang_closing = options.hang_closing
self.verbose = options.verbose
self.filename = filename
+ # Dictionary where a checker can store its custom state.
+ self._checker_states = {}
if filename is None:
self.filename = 'stdin'
self.lines = lines or []
def readline(self):
"""Get the next line from the input buffer."""
- self.line_number += 1
- if self.line_number > len(self.lines):
+ if self.line_number >= self.total_lines:
return ''
- line = self.lines[self.line_number - 1]
+ line = self.lines[self.line_number]
+ self.line_number += 1
if self.indent_char is None and line[:1] in WHITESPACE:
self.indent_char = line[0]
return line
arguments.append(getattr(self, name))
return check(*arguments)
+ def init_checker_state(self, name, argument_names):
+ """ Prepares a custom state for the specific checker plugin."""
+ if 'checker_state' in argument_names:
+ self.checker_state = self._checker_states.setdefault(name, {})
+
def check_physical(self, line):
"""Run all physical checks on a raw input line."""
self.physical_line = line
for name, check, argument_names in self._physical_checks:
+ self.init_checker_state(name, argument_names)
result = self.run_check(check, argument_names)
if result is not None:
(offset, text) = result
def build_tokens_line(self):
"""Build a logical line from tokens."""
- mapping = []
logical = []
comments = []
length = 0
- previous = None
- for token in self.tokens:
- (token_type, text) = token[0:2]
+ prev_row = prev_col = mapping = None
+ for token_type, text, start, end, line in self.tokens:
+ if token_type in SKIP_TOKENS:
+ continue
+ if not mapping:
+ mapping = [(0, start)]
if token_type == tokenize.COMMENT:
comments.append(text)
continue
- if token_type in SKIP_TOKENS:
- continue
if token_type == tokenize.STRING:
text = mute_string(text)
- if previous:
- (end_row, end) = previous[3]
- (start_row, start) = token[2]
- if end_row != start_row: # different row
- prev_text = self.lines[end_row - 1][end - 1]
+ if prev_row:
+ (start_row, start_col) = start
+ if prev_row != start_row: # different row
+ prev_text = self.lines[prev_row - 1][prev_col - 1]
if prev_text == ',' or (prev_text not in '{[('
and text not in '}])'):
- logical.append(' ')
- length += 1
- elif end != start: # different column
- fill = self.lines[end_row - 1][end:start]
- logical.append(fill)
- length += len(fill)
- length += len(text)
- mapping.append((length, token))
+ text = ' ' + text
+ elif prev_col != start_col: # different column
+ text = line[prev_col:start_col] + text
logical.append(text)
- previous = token
+ length += len(text)
+ mapping.append((length, end))
+ (prev_row, prev_col) = end
self.logical_line = ''.join(logical)
self.noqa = comments and noqa(''.join(comments))
- return mapping or [(len(self.tokens[0][1]), self.tokens[0])]
+ return mapping
def check_logical(self):
"""Build a line from tokens and run all logical checks on it."""
self.report.increment_logical_line()
mapping = self.build_tokens_line()
- (start_row, start_col) = mapping[0][1][2]
+
+ if not mapping:
+ return
+
+ (start_row, start_col) = mapping[0][1]
start_line = self.lines[start_row - 1]
self.indent_level = expand_indent(start_line[:start_col])
if self.blank_before < self.blank_lines:
for name, check, argument_names in self._logical_checks:
if self.verbose >= 4:
print(' ' + name)
- for result in self.run_check(check, argument_names) or ():
- (offset, text) = result
- if isinstance(offset, tuple):
- (li_number, li_offset) = offset
- else:
- for (token_offset, token) in mapping:
+ self.init_checker_state(name, argument_names)
+ for offset, text in self.run_check(check, argument_names) or ():
+ if not isinstance(offset, tuple):
+ for token_offset, pos in mapping:
if offset <= token_offset:
break
- li_number = token[3][0]
- li_offset = (token[3][1] + offset - token_offset)
- self.report_error(li_number, li_offset, text, check)
+ offset = (pos[0], pos[1] + offset - token_offset)
+ self.report_error(offset[0], offset[1], text, check)
if self.logical_line:
self.previous_indent_level = self.indent_level
self.previous_logical = self.logical_line
tokengen = tokenize.generate_tokens(self.readline)
try:
for token in tokengen:
+ if token[2][0] > self.total_lines:
+ return
self.maybe_check_physical(token)
yield token
except (SyntaxError, tokenize.TokenError):
def check_all(self, expected=None, line_offset=0):
"""Run all checks on the input file."""
self.report.init_file(self.filename, self.lines, expected, line_offset)
+ self.total_lines = len(self.lines)
if self._ast_checks:
self.check_ast()
self.line_number = 0
elif text in '}])':
parens -= 1
elif not parens:
- if token_type == tokenize.NEWLINE:
- self.check_logical()
- self.blank_before = 0
- elif token_type == tokenize.NL:
- if len(self.tokens) == 1:
+ if token_type in NEWLINE:
+ if token_type == tokenize.NEWLINE:
+ self.check_logical()
+ self.blank_before = 0
+ elif len(self.tokens) == 1:
# The physical line contains only this token.
self.blank_lines += 1
del self.tokens[0]
token[3] = (token[2][0], token[2][1] + len(token[1]))
self.tokens = [tuple(token)]
self.check_logical()
+ if self.tokens:
+ self.check_physical(self.lines[-1])
+ self.check_logical()
return self.report.get_file_results()
parser.add_option('--select', metavar='errors', default='',
help="select errors and warnings (e.g. E,W6)")
parser.add_option('--ignore', metavar='errors', default='',
- help="skip errors and warnings (e.g. E4,W)")
+ help="skip errors and warnings (e.g. E4,W) "
+ "(default: %s)" % DEFAULT_IGNORE)
parser.add_option('--show-source', action='store_true',
help="show source code for each error")
parser.add_option('--show-pep8', action='store_true',
# Second, parse the configuration
for opt in config.options(pep8_section):
+ if opt.replace('_', '-') not in parser.config_options:
+ print(" unknown option '%s' ignored" % opt)
+ continue
if options.verbose > 1:
print(" %s = %s" % (opt, config.get(pep8_section, opt)))
- if opt.replace('_', '-') not in parser.config_options:
- print("Unknown option: '%s'\n not in [%s]" %
- (opt, ' '.join(parser.config_options)))
- sys.exit(1)
normalized_opt = opt.replace('-', '_')
opt_type = option_list[normalized_opt]
if opt_type in ('int', 'count'):
def _main():
"""Parse options and run checks on Python source."""
+ import signal
+
+ # Handle "Broken pipe" gracefully
+ try:
+ signal.signal(signal.SIGPIPE, lambda signum, frame: sys.exit(1))
+ except AttributeError:
+ pass # not supported on Windows
+
pep8style = StyleGuide(parse_argv=True, config_file=True)
options = pep8style.options
if options.doctest or options.testsuite: