git.samba.org - third_party/pep8/blob - pep8.py

   1 #!/usr/bin/env python
   2 # pep8.py - Check Python source code formatting, according to PEP 8
   3 # Copyright (C) 2006-2009 Johann C. Rocholl <johann@rocholl.net>
   4 # Copyright (C) 2009-2014 Florent Xicluna <florent.xicluna@gmail.com>
   5 # Copyright (C) 2014 Ian Lee <ianlee1521@gmail.com>
   6 #
   7 # Permission is hereby granted, free of charge, to any person
   8 # obtaining a copy of this software and associated documentation files
   9 # (the "Software"), to deal in the Software without restriction,
  10 # including without limitation the rights to use, copy, modify, merge,
  11 # publish, distribute, sublicense, and/or sell copies of the Software,
  12 # and to permit persons to whom the Software is furnished to do so,
  13 # subject to the following conditions:
  14 #
  15 # The above copyright notice and this permission notice shall be
  16 # included in all copies or substantial portions of the Software.
  17 #
  18 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  21 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  22 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  23 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  24 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  25 # SOFTWARE.
  26
  27 r"""
  28 Check Python source code formatting, according to PEP 8.
  29
  30 For usage and a list of options, try this:
  31 $ python pep8.py -h
  32
  33 This program and its regression test suite live here:
  34 http://github.com/jcrocholl/pep8
  35
  36 Groups of errors and warnings:
  37 E errors
  38 W warnings
  39 100 indentation
  40 200 whitespace
  41 300 blank lines
  42 400 imports
  43 500 line length
  44 600 deprecation
  45 700 statements
  46 900 syntax error
  47 """
  48 from __future__ import with_statement
  49
  50 import os
  51 import sys
  52 import re
  53 import time
  54 import inspect
  55 import keyword
  56 import tokenize
  57 from optparse import OptionParser
  58 from fnmatch import fnmatch
  59 try:
  60     from configparser import RawConfigParser
  61     from io import TextIOWrapper
  62 except ImportError:
  63     from ConfigParser import RawConfigParser
  64
  65 __version__ = '1.6.0a0'
  66
  67 DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git,__pycache__,.tox'
  68 DEFAULT_IGNORE = 'E123,E226,E24,E704'
  69 try:
  70     if sys.platform == 'win32':
  71         DEFAULT_CONFIG = os.path.expanduser(r'~\.pep8')
  72     else:
  73         DEFAULT_CONFIG = os.path.join(os.getenv('XDG_CONFIG_HOME') or
  74                                       os.path.expanduser('~/.config'), 'pep8')
  75 except ImportError:
  76     DEFAULT_CONFIG = None
  77
  78 PROJECT_CONFIG = ('setup.cfg', 'tox.ini', '.pep8')
  79 TESTSUITE_PATH = os.path.join(os.path.dirname(__file__), 'testsuite')
  80 MAX_LINE_LENGTH = 79
  81 REPORT_FORMAT = {
  82     'default': '%(path)s:%(row)d:%(col)d: %(code)s %(text)s',
  83     'pylint': '%(path)s:%(row)d: [%(code)s] %(text)s',
  84 }
  85
  86 PyCF_ONLY_AST = 1024
  87 SINGLETONS = frozenset(['False', 'None', 'True'])
  88 KEYWORDS = frozenset(keyword.kwlist + ['print']) - SINGLETONS
  89 UNARY_OPERATORS = frozenset(['>>', '**', '*', '+', '-'])
  90 ARITHMETIC_OP = frozenset(['**', '*', '/', '//', '+', '-'])
  91 WS_OPTIONAL_OPERATORS = ARITHMETIC_OP.union(['^', '&', '|', '<<', '>>', '%'])
  92 WS_NEEDED_OPERATORS = frozenset([
  93     '**=', '*=', '/=', '//=', '+=', '-=', '!=', '<>', '<', '>',
  94     '%=', '^=', '&=', '|=', '==', '<=', '>=', '<<=', '>>=', '='])
  95 WHITESPACE = frozenset(' \t')
  96 NEWLINE = frozenset([tokenize.NL, tokenize.NEWLINE])
  97 SKIP_TOKENS = NEWLINE.union([tokenize.INDENT, tokenize.DEDENT])
  98 # ERRORTOKEN is triggered by backticks in Python 3
  99 SKIP_COMMENTS = SKIP_TOKENS.union([tokenize.COMMENT, tokenize.ERRORTOKEN])
 100 BENCHMARK_KEYS = ['directories', 'files', 'logical lines', 'physical lines']
 101
 102 INDENT_REGEX = re.compile(r'([ \t]*)')
 103 RAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,')
 104 RERAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,.*,\s*\w+\s*$')
 105 ERRORCODE_REGEX = re.compile(r'\b[A-Z]\d{3}\b')
 106 DOCSTRING_REGEX = re.compile(r'u?r?["\']')
 107 EXTRANEOUS_WHITESPACE_REGEX = re.compile(r'[[({] | []}),;:]')
 108 WHITESPACE_AFTER_COMMA_REGEX = re.compile(r'[,;:]\s*(?:  |\t)')
 109 COMPARE_SINGLETON_REGEX = re.compile(r'\b(None|False|True)?\s*([=!]=)'
 110                                      r'\s*(?(1)|(None|False|True))\b')
 111 COMPARE_NEGATIVE_REGEX = re.compile(r'\b(not)\s+[^][)(}{ ]+\s+(in|is)\s')
 112 COMPARE_TYPE_REGEX = re.compile(r'(?:[=!]=|is(?:\s+not)?)\s*type(?:s.\w+Type'
 113                                 r'|\s*\(\s*([^)]*[^ )])\s*\))')
 114 KEYWORD_REGEX = re.compile(r'(\s*)\b(?:%s)\b(\s*)' % r'|'.join(KEYWORDS))
 115 OPERATOR_REGEX = re.compile(r'(?:[^,\s])(\s*)(?:[-+*/|!<=>%&^]+)(\s*)')
 116 LAMBDA_REGEX = re.compile(r'\blambda\b')
 117 HUNK_REGEX = re.compile(r'^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@.*$')
 118
 119 # Work around Python < 2.6 behaviour, which does not generate NL after
 120 # a comment which is on a line by itself.
 121 COMMENT_WITH_NL = tokenize.generate_tokens(['#\n'].pop).send(None)[1] == '#\n'
 122
 123
 124 ##############################################################################
 125 # Plugins (check functions) for physical lines
 126 ##############################################################################
 127
 128
 129 def tabs_or_spaces(physical_line, indent_char):
 130     r"""Never mix tabs and spaces.
 131
 132     The most popular way of indenting Python is with spaces only.  The
 133     second-most popular way is with tabs only.  Code indented with a mixture
 134     of tabs and spaces should be converted to using spaces exclusively.  When
 135     invoking the Python command line interpreter with the -t option, it issues
 136     warnings about code that illegally mixes tabs and spaces.  When using -tt
 137     these warnings become errors.  These options are highly recommended!
 138
 139     Okay: if a == 0:\n        a = 1\n        b = 1
 140     E101: if a == 0:\n        a = 1\n\tb = 1
 141     """
 142     indent = INDENT_REGEX.match(physical_line).group(1)
 143     for offset, char in enumerate(indent):
 144         if char != indent_char:
 145             return offset, "E101 indentation contains mixed spaces and tabs"
 146
 147
 148 def tabs_obsolete(physical_line):
 149     r"""For new projects, spaces-only are strongly recommended over tabs.
 150
 151     Okay: if True:\n    return
 152     W191: if True:\n\treturn
 153     """
 154     indent = INDENT_REGEX.match(physical_line).group(1)
 155     if '\t' in indent:
 156         return indent.index('\t'), "W191 indentation contains tabs"
 157
 158
 159 def trailing_whitespace(physical_line):
 160     r"""Trailing whitespace is superfluous.
 161
 162     The warning returned varies on whether the line itself is blank, for easier
 163     filtering for those who want to indent their blank lines.
 164
 165     Okay: spam(1)\n#
 166     W291: spam(1) \n#
 167     W293: class Foo(object):\n    \n    bang = 12
 168     """
 169     physical_line = physical_line.rstrip('\n')    # chr(10), newline
 170     physical_line = physical_line.rstrip('\r')    # chr(13), carriage return
 171     physical_line = physical_line.rstrip('\x0c')  # chr(12), form feed, ^L
 172     stripped = physical_line.rstrip(' \t\v')
 173     if physical_line != stripped:
 174         if stripped:
 175             return len(stripped), "W291 trailing whitespace"
 176         else:
 177             return 0, "W293 blank line contains whitespace"
 178
 179
 180 def trailing_blank_lines(physical_line, lines, line_number, total_lines):
 181     r"""Trailing blank lines are superfluous.
 182
 183     Okay: spam(1)
 184     W391: spam(1)\n
 185
 186     However the last line should end with a new line (warning W292).
 187     """
 188     if line_number == total_lines:
 189         stripped_last_line = physical_line.rstrip()
 190         if not stripped_last_line:
 191             return 0, "W391 blank line at end of file"
 192         if stripped_last_line == physical_line:
 193             return len(physical_line), "W292 no newline at end of file"
 194
 195
 196 def maximum_line_length(physical_line, max_line_length, multiline):
 197     r"""Limit all lines to a maximum of 79 characters.
 198
 199     There are still many devices around that are limited to 80 character
 200     lines; plus, limiting windows to 80 characters makes it possible to have
 201     several windows side-by-side.  The default wrapping on such devices looks
 202     ugly.  Therefore, please limit all lines to a maximum of 79 characters.
 203     For flowing long blocks of text (docstrings or comments), limiting the
 204     length to 72 characters is recommended.
 205
 206     Reports error E501.
 207     """
 208     line = physical_line.rstrip()
 209     length = len(line)
 210     if length > max_line_length and not noqa(line):
 211         # Special case for long URLs in multi-line docstrings or comments,
 212         # but still report the error when the 72 first chars are whitespaces.
 213         chunks = line.split()
 214         if ((len(chunks) == 1 and multiline) or
 215             (len(chunks) == 2 and chunks[0] == '#')) and \
 216                 len(line) - len(chunks[-1]) < max_line_length - 7:
 217             return
 218         if hasattr(line, 'decode'):   # Python 2
 219             # The line could contain multi-byte characters
 220             try:
 221                 length = len(line.decode('utf-8'))
 222             except UnicodeError:
 223                 pass
 224         if length > max_line_length:
 225             return (max_line_length, "E501 line too long "
 226                     "(%d > %d characters)" % (length, max_line_length))
 227
 228
 229 ##############################################################################
 230 # Plugins (check functions) for logical lines
 231 ##############################################################################
 232
 233
 234 def blank_lines(logical_line, blank_lines, indent_level, line_number,
 235                 blank_before, previous_logical, previous_indent_level):
 236     r"""Separate top-level function and class definitions with two blank lines.
 237
 238     Method definitions inside a class are separated by a single blank line.
 239
 240     Extra blank lines may be used (sparingly) to separate groups of related
 241     functions.  Blank lines may be omitted between a bunch of related
 242     one-liners (e.g. a set of dummy implementations).
 243
 244     Use blank lines in functions, sparingly, to indicate logical sections.
 245
 246     Okay: def a():\n    pass\n\n\ndef b():\n    pass
 247     Okay: def a():\n    pass\n\n\n# Foo\n# Bar\n\ndef b():\n    pass
 248
 249     E301: class Foo:\n    b = 0\n    def bar():\n        pass
 250     E302: def a():\n    pass\n\ndef b(n):\n    pass
 251     E303: def a():\n    pass\n\n\n\ndef b(n):\n    pass
 252     E303: def a():\n\n\n\n    pass
 253     E304: @decorator\n\ndef a():\n    pass
 254     """
 255     if line_number < 3 and not previous_logical:
 256         return  # Don't expect blank lines before the first line
 257     if previous_logical.startswith('@'):
 258         if blank_lines:
 259             yield 0, "E304 blank lines found after function decorator"
 260     elif blank_lines > 2 or (indent_level and blank_lines == 2):
 261         yield 0, "E303 too many blank lines (%d)" % blank_lines
 262     elif logical_line.startswith(('def ', 'class ', '@')):
 263         if indent_level:
 264             if not (blank_before or previous_indent_level < indent_level or
 265                     DOCSTRING_REGEX.match(previous_logical)):
 266                 yield 0, "E301 expected 1 blank line, found 0"
 267         elif blank_before != 2:
 268             yield 0, "E302 expected 2 blank lines, found %d" % blank_before
 269
 270
 271 def extraneous_whitespace(logical_line):
 272     r"""Avoid extraneous whitespace.
 273
 274     Avoid extraneous whitespace in these situations:
 275     - Immediately inside parentheses, brackets or braces.
 276     - Immediately before a comma, semicolon, or colon.
 277
 278     Okay: spam(ham[1], {eggs: 2})
 279     E201: spam( ham[1], {eggs: 2})
 280     E201: spam(ham[ 1], {eggs: 2})
 281     E201: spam(ham[1], { eggs: 2})
 282     E202: spam(ham[1], {eggs: 2} )
 283     E202: spam(ham[1 ], {eggs: 2})
 284     E202: spam(ham[1], {eggs: 2 })
 285
 286     E203: if x == 4: print x, y; x, y = y , x
 287     E203: if x == 4: print x, y ; x, y = y, x
 288     E203: if x == 4 : print x, y; x, y = y, x
 289     """
 290     line = logical_line
 291     for match in EXTRANEOUS_WHITESPACE_REGEX.finditer(line):
 292         text = match.group()
 293         char = text.strip()
 294         found = match.start()
 295         if text == char + ' ':
 296             # assert char in '([{'
 297             yield found + 1, "E201 whitespace after '%s'" % char
 298         elif line[found - 1] != ',':
 299             code = ('E202' if char in '}])' else 'E203')  # if char in ',;:'
 300             yield found, "%s whitespace before '%s'" % (code, char)
 301
 302
 303 def whitespace_around_keywords(logical_line):
 304     r"""Avoid extraneous whitespace around keywords.
 305
 306     Okay: True and False
 307     E271: True and  False
 308     E272: True  and False
 309     E273: True and\tFalse
 310     E274: True\tand False
 311     """
 312     for match in KEYWORD_REGEX.finditer(logical_line):
 313         before, after = match.groups()
 314
 315         if '\t' in before:
 316             yield match.start(1), "E274 tab before keyword"
 317         elif len(before) > 1:
 318             yield match.start(1), "E272 multiple spaces before keyword"
 319
 320         if '\t' in after:
 321             yield match.start(2), "E273 tab after keyword"
 322         elif len(after) > 1:
 323             yield match.start(2), "E271 multiple spaces after keyword"
 324
 325
 326 def missing_whitespace(logical_line):
 327     r"""Each comma, semicolon or colon should be followed by whitespace.
 328
 329     Okay: [a, b]
 330     Okay: (3,)
 331     Okay: a[1:4]
 332     Okay: a[:4]
 333     Okay: a[1:]
 334     Okay: a[1:4:2]
 335     E231: ['a','b']
 336     E231: foo(bar,baz)
 337     E231: [{'a':'b'}]
 338     """
 339     line = logical_line
 340     for index in range(len(line) - 1):
 341         char = line[index]
 342         if char in ',;:' and line[index + 1] not in WHITESPACE:
 343             before = line[:index]
 344             if char == ':' and before.count('[') > before.count(']') and \
 345                     before.rfind('{') < before.rfind('['):
 346                 continue  # Slice syntax, no space required
 347             if char == ',' and line[index + 1] == ')':
 348                 continue  # Allow tuple with only one element: (3,)
 349             yield index, "E231 missing whitespace after '%s'" % char
 350
 351
 352 def indentation(logical_line, previous_logical, indent_char,
 353                 indent_level, previous_indent_level):
 354     r"""Use 4 spaces per indentation level.
 355
 356     For really old code that you don't want to mess up, you can continue to
 357     use 8-space tabs.
 358
 359     Okay: a = 1
 360     Okay: if a == 0:\n    a = 1
 361     E111:   a = 1
 362     E114:   # a = 1
 363
 364     Okay: for item in items:\n    pass
 365     E112: for item in items:\npass
 366     E115: for item in items:\n# Hi\n    pass
 367
 368     Okay: a = 1\nb = 2
 369     E113: a = 1\n    b = 2
 370     E116: a = 1\n    # b = 2
 371     """
 372     c = 0 if logical_line else 3
 373     tmpl = "E11%d %s" if logical_line else "E11%d %s (comment)"
 374     if indent_level % 4:
 375         yield 0, tmpl % (1 + c, "indentation is not a multiple of four")
 376     indent_expect = previous_logical.endswith(':')
 377     if indent_expect and indent_level <= previous_indent_level:
 378         yield 0, tmpl % (2 + c, "expected an indented block")
 379     elif not indent_expect and indent_level > previous_indent_level:
 380         yield 0, tmpl % (3 + c, "unexpected indentation")
 381
 382
 383 def continued_indentation(logical_line, tokens, indent_level, hang_closing,
 384                           indent_char, noqa, verbose):
 385     r"""Continuation lines indentation.
 386
 387     Continuation lines should align wrapped elements either vertically
 388     using Python's implicit line joining inside parentheses, brackets
 389     and braces, or using a hanging indent.
 390
 391     When using a hanging indent these considerations should be applied:
 392     - there should be no arguments on the first line, and
 393     - further indentation should be used to clearly distinguish itself as a
 394       continuation line.
 395
 396     Okay: a = (\n)
 397     E123: a = (\n    )
 398
 399     Okay: a = (\n    42)
 400     E121: a = (\n   42)
 401     E122: a = (\n42)
 402     E123: a = (\n    42\n    )
 403     E124: a = (24,\n     42\n)
 404     E125: if (\n    b):\n    pass
 405     E126: a = (\n        42)
 406     E127: a = (24,\n      42)
 407     E128: a = (24,\n    42)
 408     E129: if (a or\n    b):\n    pass
 409     E131: a = (\n    42\n 24)
 410     """
 411     first_row = tokens[0][2][0]
 412     nrows = 1 + tokens[-1][2][0] - first_row
 413     if noqa or nrows == 1:
 414         return
 415
 416     # indent_next tells us whether the next block is indented; assuming
 417     # that it is indented by 4 spaces, then we should not allow 4-space
 418     # indents on the final continuation line; in turn, some other
 419     # indents are allowed to have an extra 4 spaces.
 420     indent_next = logical_line.endswith(':')
 421
 422     row = depth = 0
 423     valid_hangs = (4,) if indent_char != '\t' else (4, 8)
 424     # remember how many brackets were opened on each line
 425     parens = [0] * nrows
 426     # relative indents of physical lines
 427     rel_indent = [0] * nrows
 428     # for each depth, collect a list of opening rows
 429     open_rows = [[0]]
 430     # for each depth, memorize the hanging indentation
 431     hangs = [None]
 432     # visual indents
 433     indent_chances = {}
 434     last_indent = tokens[0][2]
 435     visual_indent = None
 436     # for each depth, memorize the visual indent column
 437     indent = [last_indent[1]]
 438     if verbose >= 3:
 439         print(">>> " + tokens[0][4].rstrip())
 440
 441     for token_type, text, start, end, line in tokens:
 442
 443         newline = row < start[0] - first_row
 444         if newline:
 445             row = start[0] - first_row
 446             newline = not last_token_multiline and token_type not in NEWLINE
 447
 448         if newline:
 449             # this is the beginning of a continuation line.
 450             last_indent = start
 451             if verbose >= 3:
 452                 print("... " + line.rstrip())
 453
 454             # record the initial indent.
 455             rel_indent[row] = expand_indent(line) - indent_level
 456
 457             # identify closing bracket
 458             close_bracket = (token_type == tokenize.OP and text in ']})')
 459
 460             # is the indent relative to an opening bracket line?
 461             for open_row in reversed(open_rows[depth]):
 462                 hang = rel_indent[row] - rel_indent[open_row]
 463                 hanging_indent = hang in valid_hangs
 464                 if hanging_indent:
 465                     break
 466             if hangs[depth]:
 467                 hanging_indent = (hang == hangs[depth])
 468             # is there any chance of visual indent?
 469             visual_indent = (not close_bracket and hang > 0 and
 470                              indent_chances.get(start[1]))
 471
 472             if close_bracket and indent[depth]:
 473                 # closing bracket for visual indent
 474                 if start[1] != indent[depth]:
 475                     yield (start, "E124 closing bracket does not match "
 476                            "visual indentation")
 477             elif close_bracket and not hang:
 478                 # closing bracket matches indentation of opening bracket's line
 479                 if hang_closing:
 480                     yield start, "E133 closing bracket is missing indentation"
 481             elif indent[depth] and start[1] < indent[depth]:
 482                 if visual_indent is not True:
 483                     # visual indent is broken
 484                     yield (start, "E128 continuation line "
 485                            "under-indented for visual indent")
 486             elif hanging_indent or (indent_next and rel_indent[row] == 8):
 487                 # hanging indent is verified
 488                 if close_bracket and not hang_closing:
 489                     yield (start, "E123 closing bracket does not match "
 490                            "indentation of opening bracket's line")
 491                 hangs[depth] = hang
 492             elif visual_indent is True:
 493                 # visual indent is verified
 494                 indent[depth] = start[1]
 495             elif visual_indent in (text, str):
 496                 # ignore token lined up with matching one from a previous line
 497                 pass
 498             else:
 499                 # indent is broken
 500                 if hang <= 0:
 501                     error = "E122", "missing indentation or outdented"
 502                 elif indent[depth]:
 503                     error = "E127", "over-indented for visual indent"
 504                 elif not close_bracket and hangs[depth]:
 505                     error = "E131", "unaligned for hanging indent"
 506                 else:
 507                     hangs[depth] = hang
 508                     if hang > 4:
 509                         error = "E126", "over-indented for hanging indent"
 510                     else:
 511                         error = "E121", "under-indented for hanging indent"
 512                 yield start, "%s continuation line %s" % error
 513
 514         # look for visual indenting
 515         if (parens[row] and token_type not in (tokenize.NL, tokenize.COMMENT)
 516                 and not indent[depth]):
 517             indent[depth] = start[1]
 518             indent_chances[start[1]] = True
 519             if verbose >= 4:
 520                 print("bracket depth %s indent to %s" % (depth, start[1]))
 521         # deal with implicit string concatenation
 522         elif (token_type in (tokenize.STRING, tokenize.COMMENT) or
 523               text in ('u', 'ur', 'b', 'br')):
 524             indent_chances[start[1]] = str
 525         # special case for the "if" statement because len("if (") == 4
 526         elif not indent_chances and not row and not depth and text == 'if':
 527             indent_chances[end[1] + 1] = True
 528         elif text == ':' and line[end[1]:].isspace():
 529             open_rows[depth].append(row)
 530
 531         # keep track of bracket depth
 532         if token_type == tokenize.OP:
 533             if text in '([{':
 534                 depth += 1
 535                 indent.append(0)
 536                 hangs.append(None)
 537                 if len(open_rows) == depth:
 538                     open_rows.append([])
 539                 open_rows[depth].append(row)
 540                 parens[row] += 1
 541                 if verbose >= 4:
 542                     print("bracket depth %s seen, col %s, visual min = %s" %
 543                           (depth, start[1], indent[depth]))
 544             elif text in ')]}' and depth > 0:
 545                 # parent indents should not be more than this one
 546                 prev_indent = indent.pop() or last_indent[1]
 547                 hangs.pop()
 548                 for d in range(depth):
 549                     if indent[d] > prev_indent:
 550                         indent[d] = 0
 551                 for ind in list(indent_chances):
 552                     if ind >= prev_indent:
 553                         del indent_chances[ind]
 554                 del open_rows[depth + 1:]
 555                 depth -= 1
 556                 if depth:
 557                     indent_chances[indent[depth]] = True
 558                 for idx in range(row, -1, -1):
 559                     if parens[idx]:
 560                         parens[idx] -= 1
 561                         break
 562             assert len(indent) == depth + 1
 563             if start[1] not in indent_chances:
 564                 # allow to line up tokens
 565                 indent_chances[start[1]] = text
 566
 567         last_token_multiline = (start[0] != end[0])
 568         if last_token_multiline:
 569             rel_indent[end[0] - first_row] = rel_indent[row]
 570
 571     if indent_next and expand_indent(line) == indent_level + 4:
 572         pos = (start[0], indent[0] + 4)
 573         if visual_indent:
 574             code = "E129 visually indented line"
 575         else:
 576             code = "E125 continuation line"
 577         yield pos, "%s with same indent as next logical line" % code
 578
 579
 580 def whitespace_before_parameters(logical_line, tokens):
 581     r"""Avoid extraneous whitespace.
 582
 583     Avoid extraneous whitespace in the following situations:
 584     - before the open parenthesis that starts the argument list of a
 585       function call.
 586     - before the open parenthesis that starts an indexing or slicing.
 587
 588     Okay: spam(1)
 589     E211: spam (1)
 590
 591     Okay: dict['key'] = list[index]
 592     E211: dict ['key'] = list[index]
 593     E211: dict['key'] = list [index]
 594     """
 595     prev_type, prev_text, __, prev_end, __ = tokens[0]
 596     for index in range(1, len(tokens)):
 597         token_type, text, start, end, __ = tokens[index]
 598         if (token_type == tokenize.OP and
 599             text in '([' and
 600             start != prev_end and
 601             (prev_type == tokenize.NAME or prev_text in '}])') and
 602             # Syntax "class A (B):" is allowed, but avoid it
 603             (index < 2 or tokens[index - 2][1] != 'class') and
 604                 # Allow "return (a.foo for a in range(5))"
 605                 not keyword.iskeyword(prev_text)):
 606             yield prev_end, "E211 whitespace before '%s'" % text
 607         prev_type = token_type
 608         prev_text = text
 609         prev_end = end
 610
 611
 612 def whitespace_around_operator(logical_line):
 613     r"""Avoid extraneous whitespace around an operator.
 614
 615     Okay: a = 12 + 3
 616     E221: a = 4  + 5
 617     E222: a = 4 +  5
 618     E223: a = 4\t+ 5
 619     E224: a = 4 +\t5
 620     """
 621     for match in OPERATOR_REGEX.finditer(logical_line):
 622         before, after = match.groups()
 623
 624         if '\t' in before:
 625             yield match.start(1), "E223 tab before operator"
 626         elif len(before) > 1:
 627             yield match.start(1), "E221 multiple spaces before operator"
 628
 629         if '\t' in after:
 630             yield match.start(2), "E224 tab after operator"
 631         elif len(after) > 1:
 632             yield match.start(2), "E222 multiple spaces after operator"
 633
 634
 635 def missing_whitespace_around_operator(logical_line, tokens):
 636     r"""Surround operators with a single space on either side.
 637
 638     - Always surround these binary operators with a single space on
 639       either side: assignment (=), augmented assignment (+=, -= etc.),
 640       comparisons (==, <, >, !=, <=, >=, in, not in, is, is not),
 641       Booleans (and, or, not).
 642
 643     - If operators with different priorities are used, consider adding
 644       whitespace around the operators with the lowest priorities.
 645
 646     Okay: i = i + 1
 647     Okay: submitted += 1
 648     Okay: x = x * 2 - 1
 649     Okay: hypot2 = x * x + y * y
 650     Okay: c = (a + b) * (a - b)
 651     Okay: foo(bar, key='word', *args, **kwargs)
 652     Okay: alpha[:-i]
 653
 654     E225: i=i+1
 655     E225: submitted +=1
 656     E225: x = x /2 - 1
 657     E225: z = x **y
 658     E226: c = (a+b) * (a-b)
 659     E226: hypot2 = x*x + y*y
 660     E227: c = a|b
 661     E228: msg = fmt%(errno, errmsg)
 662     """
 663     parens = 0
 664     need_space = False
 665     prev_type = tokenize.OP
 666     prev_text = prev_end = None
 667     for token_type, text, start, end, line in tokens:
 668         if token_type in SKIP_COMMENTS:
 669             continue
 670         if text in ('(', 'lambda'):
 671             parens += 1
 672         elif text == ')':
 673             parens -= 1
 674         if need_space:
 675             if start != prev_end:
 676                 # Found a (probably) needed space
 677                 if need_space is not True and not need_space[1]:
 678                     yield (need_space[0],
 679                            "E225 missing whitespace around operator")
 680                 need_space = False
 681             elif text == '>' and prev_text in ('<', '-'):
 682                 # Tolerate the "<>" operator, even if running Python 3
 683                 # Deal with Python 3's annotated return value "->"
 684                 pass
 685             else:
 686                 if need_space is True or need_space[1]:
 687                     # A needed trailing space was not found
 688                     yield prev_end, "E225 missing whitespace around operator"
 689                 elif prev_text != '**':
 690                     code, optype = 'E226', 'arithmetic'
 691                     if prev_text == '%':
 692                         code, optype = 'E228', 'modulo'
 693                     elif prev_text not in ARITHMETIC_OP:
 694                         code, optype = 'E227', 'bitwise or shift'
 695                     yield (need_space[0], "%s missing whitespace "
 696                            "around %s operator" % (code, optype))
 697                 need_space = False
 698         elif token_type == tokenize.OP and prev_end is not None:
 699             if text == '=' and parens:
 700                 # Allow keyword args or defaults: foo(bar=None).
 701                 pass
 702             elif text in WS_NEEDED_OPERATORS:
 703                 need_space = True
 704             elif text in UNARY_OPERATORS:
 705                 # Check if the operator is being used as a binary operator
 706                 # Allow unary operators: -123, -x, +1.
 707                 # Allow argument unpacking: foo(*args, **kwargs).
 708                 if (prev_text in '}])' if prev_type == tokenize.OP
 709                         else prev_text not in KEYWORDS):
 710                     need_space = None
 711             elif text in WS_OPTIONAL_OPERATORS:
 712                 need_space = None
 713
 714             if need_space is None:
 715                 # Surrounding space is optional, but ensure that
 716                 # trailing space matches opening space
 717                 need_space = (prev_end, start != prev_end)
 718             elif need_space and start == prev_end:
 719                 # A needed opening space was not found
 720                 yield prev_end, "E225 missing whitespace around operator"
 721                 need_space = False
 722         prev_type = token_type
 723         prev_text = text
 724         prev_end = end
 725
 726
 727 def whitespace_around_comma(logical_line):
 728     r"""Avoid extraneous whitespace after a comma or a colon.
 729
 730     Note: these checks are disabled by default
 731
 732     Okay: a = (1, 2)
 733     E241: a = (1,  2)
 734     E242: a = (1,\t2)
 735     """
 736     line = logical_line
 737     for m in WHITESPACE_AFTER_COMMA_REGEX.finditer(line):
 738         found = m.start() + 1
 739         if '\t' in m.group():
 740             yield found, "E242 tab after '%s'" % m.group()[0]
 741         else:
 742             yield found, "E241 multiple spaces after '%s'" % m.group()[0]
 743
 744
 745 def whitespace_around_named_parameter_equals(logical_line, tokens):
 746     r"""Don't use spaces around the '=' sign in function arguments.
 747
 748     Don't use spaces around the '=' sign when used to indicate a
 749     keyword argument or a default parameter value.
 750
 751     Okay: def complex(real, imag=0.0):
 752     Okay: return magic(r=real, i=imag)
 753     Okay: boolean(a == b)
 754     Okay: boolean(a != b)
 755     Okay: boolean(a <= b)
 756     Okay: boolean(a >= b)
 757
 758     E251: def complex(real, imag = 0.0):
 759     E251: return magic(r = real, i = imag)
 760     """
 761     parens = 0
 762     no_space = False
 763     prev_end = None
 764     message = "E251 unexpected spaces around keyword / parameter equals"
 765     for token_type, text, start, end, line in tokens:
 766         if token_type == tokenize.NL:
 767             continue
 768         if no_space:
 769             no_space = False
 770             if start != prev_end:
 771                 yield (prev_end, message)
 772         if token_type == tokenize.OP:
 773             if text == '(':
 774                 parens += 1
 775             elif text == ')':
 776                 parens -= 1
 777             elif parens and text == '=':
 778                 no_space = True
 779                 if start != prev_end:
 780                     yield (prev_end, message)
 781         prev_end = end
 782
 783
 784 def whitespace_before_comment(logical_line, tokens):
 785     r"""Separate inline comments by at least two spaces.
 786
 787     An inline comment is a comment on the same line as a statement.  Inline
 788     comments should be separated by at least two spaces from the statement.
 789     They should start with a # and a single space.
 790
 791     Each line of a block comment starts with a # and a single space
 792     (unless it is indented text inside the comment).
 793
 794     Okay: x = x + 1  # Increment x
 795     Okay: x = x + 1    # Increment x
 796     Okay: # Block comment
 797     E261: x = x + 1 # Increment x
 798     E262: x = x + 1  #Increment x
 799     E262: x = x + 1  #  Increment x
 800     E265: #Block comment
 801     E266: ### Block comment
 802     """
 803     prev_end = (0, 0)
 804     for token_type, text, start, end, line in tokens:
 805         if token_type == tokenize.COMMENT:
 806             inline_comment = line[:start[1]].strip()
 807             if inline_comment:
 808                 if prev_end[0] == start[0] and start[1] < prev_end[1] + 2:
 809                     yield (prev_end,
 810                            "E261 at least two spaces before inline comment")
 811             symbol, sp, comment = text.partition(' ')
 812             bad_prefix = symbol not in '#:' and (symbol.lstrip('#')[:1] or '#')
 813             if inline_comment:
 814                 if bad_prefix or comment[:1] in WHITESPACE:
 815                     yield start, "E262 inline comment should start with '# '"
 816             elif bad_prefix and (bad_prefix != '!' or start[0] > 1):
 817                 if bad_prefix != '#':
 818                     yield start, "E265 block comment should start with '# '"
 819                 elif comment:
 820                     yield start, "E266 too many leading '#' for block comment"
 821         elif token_type != tokenize.NL:
 822             prev_end = end
 823
 824
 825 def imports_on_separate_lines(logical_line):
 826     r"""Imports should usually be on separate lines.
 827
 828     Okay: import os\nimport sys
 829     E401: import sys, os
 830
 831     Okay: from subprocess import Popen, PIPE
 832     Okay: from myclas import MyClass
 833     Okay: from foo.bar.yourclass import YourClass
 834     Okay: import myclass
 835     Okay: import foo.bar.yourclass
 836     """
 837     line = logical_line
 838     if line.startswith('import '):
 839         found = line.find(',')
 840         if -1 < found and ';' not in line[:found]:
 841             yield found, "E401 multiple imports on one line"
 842
 843
 844 def module_imports_on_top_of_file(
 845         logical_line, indent_level, checker_state, noqa):
 846     r"""Imports are always put at the top of the file, just after any module
 847     comments and docstrings, and before module globals and constants.
 848
 849     Okay: import os
 850     Okay: # this is a comment\nimport os
 851     Okay: '''this is a module docstring'''\nimport os
 852     Okay: r'''this is a module docstring'''\nimport os
 853     Okay: try:\n    import x\nexcept:\n    pass\nelse:\n    pass\nimport y
 854     Okay: try:\n    import x\nexcept:\n    pass\nfinally:\n    pass\nimport y
 855     E402: a=1\nimport os
 856     E402: 'One string'\n"Two string"\nimport os
 857     E402: a=1\nfrom sys import x
 858
 859     Okay: if x:\n    import os
 860     """
 861     def is_string_literal(line):
 862         if line[0] in 'uUbB':
 863             line = line[1:]
 864         if line and line[0] in 'rR':
 865             line = line[1:]
 866         return line and (line[0] == '"' or line[0] == "'")
 867
 868     allowed_try_keywords = ('try', 'except', 'else', 'finally')
 869
 870     if indent_level:  # Allow imports in conditional statements or functions
 871         return
 872     if not logical_line:  # Allow empty lines or comments
 873         return
 874     if noqa:
 875         return
 876     line = logical_line
 877     if line.startswith('import ') or line.startswith('from '):
 878         if checker_state.get('seen_non_imports', False):
 879             yield 0, "E402 import not at top of file"
 880     elif any(line.startswith(kw) for kw in allowed_try_keywords):
 881         # Allow try, except, else, finally keywords intermixed with imports in
 882         # order to support conditional importing
 883         return
 884     elif is_string_literal(line):
 885         # The first literal is a docstring, allow it. Otherwise, report error.
 886         if checker_state.get('seen_docstring', False):
 887             checker_state['seen_non_imports'] = True
 888         else:
 889             checker_state['seen_docstring'] = True
 890     else:
 891         checker_state['seen_non_imports'] = True
 892
 893
 894 def compound_statements(logical_line):
 895     r"""Compound statements (on the same line) are generally discouraged.
 896
 897     While sometimes it's okay to put an if/for/while with a small body
 898     on the same line, never do this for multi-clause statements.
 899     Also avoid folding such long lines!
 900
 901     Always use a def statement instead of an assignment statement that
 902     binds a lambda expression directly to a name.
 903
 904     Okay: if foo == 'blah':\n    do_blah_thing()
 905     Okay: do_one()
 906     Okay: do_two()
 907     Okay: do_three()
 908
 909     E701: if foo == 'blah': do_blah_thing()
 910     E701: for x in lst: total += x
 911     E701: while t < 10: t = delay()
 912     E701: if foo == 'blah': do_blah_thing()
 913     E701: else: do_non_blah_thing()
 914     E701: try: something()
 915     E701: finally: cleanup()
 916     E701: if foo == 'blah': one(); two(); three()
 917     E702: do_one(); do_two(); do_three()
 918     E703: do_four();  # useless semicolon
 919     E704: def f(x): return 2*x
 920     E731: f = lambda x: 2*x
 921     """
 922     line = logical_line
 923     last_char = len(line) - 1
 924     found = line.find(':')
 925     while -1 < found < last_char:
 926         before = line[:found]
 927         if ((before.count('{') <= before.count('}') and   # {'a': 1} (dict)
 928              before.count('[') <= before.count(']') and   # [1:2] (slice)
 929              before.count('(') <= before.count(')'))):    # (annotation)
 930             lambda_kw = LAMBDA_REGEX.search(before)
 931             if lambda_kw:
 932                 before = line[:lambda_kw.start()].rstrip()
 933                 if before[-1:] == '=' and isidentifier(before[:-1].strip()):
 934                     yield 0, ("E731 do not assign a lambda expression, use a "
 935                               "def")
 936                 break
 937             if before.startswith('def '):
 938                 yield 0, "E704 multiple statements on one line (def)"
 939             else:
 940                 yield found, "E701 multiple statements on one line (colon)"
 941         found = line.find(':', found + 1)
 942     found = line.find(';')
 943     while -1 < found:
 944         if found < last_char:
 945             yield found, "E702 multiple statements on one line (semicolon)"
 946         else:
 947             yield found, "E703 statement ends with a semicolon"
 948         found = line.find(';', found + 1)
 949
 950
 951 def explicit_line_join(logical_line, tokens):
 952     r"""Avoid explicit line join between brackets.
 953
 954     The preferred way of wrapping long lines is by using Python's implied line
 955     continuation inside parentheses, brackets and braces.  Long lines can be
 956     broken over multiple lines by wrapping expressions in parentheses.  These
 957     should be used in preference to using a backslash for line continuation.
 958
 959     E502: aaa = [123, \\n       123]
 960     E502: aaa = ("bbb " \\n       "ccc")
 961
 962     Okay: aaa = [123,\n       123]
 963     Okay: aaa = ("bbb "\n       "ccc")
 964     Okay: aaa = "bbb " \\n    "ccc"
 965     """
 966     prev_start = prev_end = parens = 0
 967     for token_type, text, start, end, line in tokens:
 968         if start[0] != prev_start and parens and backslash:
 969             yield backslash, "E502 the backslash is redundant between brackets"
 970         if end[0] != prev_end:
 971             if line.rstrip('\r\n').endswith('\\'):
 972                 backslash = (end[0], len(line.splitlines()[-1]) - 1)
 973             else:
 974                 backslash = None
 975             prev_start = prev_end = end[0]
 976         else:
 977             prev_start = start[0]
 978         if token_type == tokenize.OP:
 979             if text in '([{':
 980                 parens += 1
 981             elif text in ')]}':
 982                 parens -= 1
 983
 984
 985 def comparison_to_singleton(logical_line, noqa):
 986     r"""Comparison to singletons should use "is" or "is not".
 987
 988     Comparisons to singletons like None should always be done
 989     with "is" or "is not", never the equality operators.
 990
 991     Okay: if arg is not None:
 992     E711: if arg != None:
 993     E711: if None == arg:
 994     E712: if arg == True:
 995     E712: if False == arg:
 996
 997     Also, beware of writing if x when you really mean if x is not None --
 998     e.g. when testing whether a variable or argument that defaults to None was
 999     set to some other value.  The other value might have a type (such as a
1000     container) that could be false in a boolean context!
1001     """
1002     match = not noqa and COMPARE_SINGLETON_REGEX.search(logical_line)
1003     if match:
1004         singleton = match.group(1) or match.group(3)
1005         same = (match.group(2) == '==')
1006
1007         msg = "'if cond is %s:'" % (('' if same else 'not ') + singleton)
1008         if singleton in ('None',):
1009             code = 'E711'
1010         else:
1011             code = 'E712'
1012             nonzero = ((singleton == 'True' and same) or
1013                        (singleton == 'False' and not same))
1014             msg += " or 'if %scond:'" % ('' if nonzero else 'not ')
1015         yield match.start(2), ("%s comparison to %s should be %s" %
1016                                (code, singleton, msg))
1017
1018
1019 def comparison_negative(logical_line):
1020     r"""Negative comparison should be done using "not in" and "is not".
1021
1022     Okay: if x not in y:\n    pass
1023     Okay: assert (X in Y or X is Z)
1024     Okay: if not (X in Y):\n    pass
1025     Okay: zz = x is not y
1026     E713: Z = not X in Y
1027     E713: if not X.B in Y:\n    pass
1028     E714: if not X is Y:\n    pass
1029     E714: Z = not X.B is Y
1030     """
1031     match = COMPARE_NEGATIVE_REGEX.search(logical_line)
1032     if match:
1033         pos = match.start(1)
1034         if match.group(2) == 'in':
1035             yield pos, "E713 test for membership should be 'not in'"
1036         else:
1037             yield pos, "E714 test for object identity should be 'is not'"
1038
1039
1040 def comparison_type(logical_line):
1041     r"""Object type comparisons should always use isinstance().
1042
1043     Do not compare types directly.
1044
1045     Okay: if isinstance(obj, int):
1046     E721: if type(obj) is type(1):
1047
1048     When checking if an object is a string, keep in mind that it might be a
1049     unicode string too! In Python 2.3, str and unicode have a common base
1050     class, basestring, so you can do:
1051
1052     Okay: if isinstance(obj, basestring):
1053     Okay: if type(a1) is type(b1):
1054     """
1055     match = COMPARE_TYPE_REGEX.search(logical_line)
1056     if match:
1057         inst = match.group(1)
1058         if inst and isidentifier(inst) and inst not in SINGLETONS:
1059             return  # Allow comparison for types which are not obvious
1060         yield match.start(), "E721 do not compare types, use 'isinstance()'"
1061
1062
1063 def python_3000_has_key(logical_line, noqa):
1064     r"""The {}.has_key() method is removed in Python 3: use the 'in' operator.
1065
1066     Okay: if "alph" in d:\n    print d["alph"]
1067     W601: assert d.has_key('alph')
1068     """
1069     pos = logical_line.find('.has_key(')
1070     if pos > -1 and not noqa:
1071         yield pos, "W601 .has_key() is deprecated, use 'in'"
1072
1073
1074 def python_3000_raise_comma(logical_line):
1075     r"""When raising an exception, use "raise ValueError('message')".
1076
1077     The older form is removed in Python 3.
1078
1079     Okay: raise DummyError("Message")
1080     W602: raise DummyError, "Message"
1081     """
1082     match = RAISE_COMMA_REGEX.match(logical_line)
1083     if match and not RERAISE_COMMA_REGEX.match(logical_line):
1084         yield match.end() - 1, "W602 deprecated form of raising exception"
1085
1086
1087 def python_3000_not_equal(logical_line):
1088     r"""New code should always use != instead of <>.
1089
1090     The older syntax is removed in Python 3.
1091
1092     Okay: if a != 'no':
1093     W603: if a <> 'no':
1094     """
1095     pos = logical_line.find('<>')
1096     if pos > -1:
1097         yield pos, "W603 '<>' is deprecated, use '!='"
1098
1099
1100 def python_3000_backticks(logical_line):
1101     r"""Backticks are removed in Python 3: use repr() instead.
1102
1103     Okay: val = repr(1 + 2)
1104     W604: val = `1 + 2`
1105     """
1106     pos = logical_line.find('`')
1107     if pos > -1:
1108         yield pos, "W604 backticks are deprecated, use 'repr()'"
1109
1110
1111 ##############################################################################
1112 # Helper functions
1113 ##############################################################################
1114
1115
1116 if '' == ''.encode():
1117     # Python 2: implicit encoding.
1118     def readlines(filename):
1119         """Read the source code."""
1120         with open(filename, 'rU') as f:
1121             return f.readlines()
1122     isidentifier = re.compile(r'[a-zA-Z_]\w*$').match
1123     stdin_get_value = sys.stdin.read
1124 else:
1125     # Python 3
1126     def readlines(filename):
1127         """Read the source code."""
1128         try:
1129             with open(filename, 'rb') as f:
1130                 (coding, lines) = tokenize.detect_encoding(f.readline)
1131                 f = TextIOWrapper(f, coding, line_buffering=True)
1132                 return [l.decode(coding) for l in lines] + f.readlines()
1133         except (LookupError, SyntaxError, UnicodeError):
1134             # Fall back if file encoding is improperly declared
1135             with open(filename, encoding='latin-1') as f:
1136                 return f.readlines()
1137     isidentifier = str.isidentifier
1138
1139     def stdin_get_value():
1140         return TextIOWrapper(sys.stdin.buffer, errors='ignore').read()
1141 noqa = re.compile(r'# no(?:qa|pep8)\b', re.I).search
1142
1143
1144 def expand_indent(line):
1145     r"""Return the amount of indentation.
1146
1147     Tabs are expanded to the next multiple of 8.
1148
1149     >>> expand_indent('    ')
1150     4
1151     >>> expand_indent('\t')
1152     8
1153     >>> expand_indent('       \t')
1154     8
1155     >>> expand_indent('        \t')
1156     16
1157     """
1158     if '\t' not in line:
1159         return len(line) - len(line.lstrip())
1160     result = 0
1161     for char in line:
1162         if char == '\t':
1163             result = result // 8 * 8 + 8
1164         elif char == ' ':
1165             result += 1
1166         else:
1167             break
1168     return result
1169
1170
1171 def mute_string(text):
1172     """Replace contents with 'xxx' to prevent syntax matching.
1173
1174     >>> mute_string('"abc"')
1175     '"xxx"'
1176     >>> mute_string("'''abc'''")
1177     "'''xxx'''"
1178     >>> mute_string("r'abc'")
1179     "r'xxx'"
1180     """
1181     # String modifiers (e.g. u or r)
1182     start = text.index(text[-1]) + 1
1183     end = len(text) - 1
1184     # Triple quotes
1185     if text[-3:] in ('"""', "'''"):
1186         start += 2
1187         end -= 2
1188     return text[:start] + 'x' * (end - start) + text[end:]
1189
1190
1191 def parse_udiff(diff, patterns=None, parent='.'):
1192     """Return a dictionary of matching lines."""
1193     # For each file of the diff, the entry key is the filename,
1194     # and the value is a set of row numbers to consider.
1195     rv = {}
1196     path = nrows = None
1197     for line in diff.splitlines():
1198         if nrows:
1199             if line[:1] != '-':
1200                 nrows -= 1
1201             continue
1202         if line[:3] == '@@ ':
1203             hunk_match = HUNK_REGEX.match(line)
1204             (row, nrows) = [int(g or '1') for g in hunk_match.groups()]
1205             rv[path].update(range(row, row + nrows))
1206         elif line[:3] == '+++':
1207             path = line[4:].split('\t', 1)[0]
1208             if path[:2] == 'b/':
1209                 path = path[2:]
1210             rv[path] = set()
1211     return dict([(os.path.join(parent, path), rows)
1212                  for (path, rows) in rv.items()
1213                  if rows and filename_match(path, patterns)])
1214
1215
1216 def normalize_paths(value, parent=os.curdir):
1217     """Parse a comma-separated list of paths.
1218
1219     Return a list of absolute paths.
1220     """
1221     if not value:
1222         return []
1223     if isinstance(value, list):
1224         return value
1225     paths = []
1226     for path in value.split(','):
1227         path = path.strip()
1228         if '/' in path:
1229             path = os.path.abspath(os.path.join(parent, path))
1230         paths.append(path.rstrip('/'))
1231     return paths
1232
1233
1234 def filename_match(filename, patterns, default=True):
1235     """Check if patterns contains a pattern that matches filename.
1236
1237     If patterns is unspecified, this always returns True.
1238     """
1239     if not patterns:
1240         return default
1241     return any(fnmatch(filename, pattern) for pattern in patterns)
1242
1243
1244 def _is_eol_token(token):
1245     return token[0] in NEWLINE or token[4][token[3][1]:].lstrip() == '\\\n'
1246 if COMMENT_WITH_NL:
1247     def _is_eol_token(token, _eol_token=_is_eol_token):
1248         return _eol_token(token) or (token[0] == tokenize.COMMENT and
1249                                      token[1] == token[4])
1250
1251 ##############################################################################
1252 # Framework to run all checks
1253 ##############################################################################
1254
1255
1256 _checks = {'physical_line': {}, 'logical_line': {}, 'tree': {}}
1257
1258
1259 def register_check(check, codes=None):
1260     """Register a new check object."""
1261     def _add_check(check, kind, codes, args):
1262         if check in _checks[kind]:
1263             _checks[kind][check][0].extend(codes or [])
1264         else:
1265             _checks[kind][check] = (codes or [''], args)
1266     if inspect.isfunction(check):
1267         args = inspect.getargspec(check)[0]
1268         if args and args[0] in ('physical_line', 'logical_line'):
1269             if codes is None:
1270                 codes = ERRORCODE_REGEX.findall(check.__doc__ or '')
1271             _add_check(check, args[0], codes, args)
1272     elif inspect.isclass(check):
1273         if inspect.getargspec(check.__init__)[0][:2] == ['self', 'tree']:
1274             _add_check(check, 'tree', codes, None)
1275
1276
1277 def init_checks_registry():
1278     """Register all globally visible functions.
1279
1280     The first argument name is either 'physical_line' or 'logical_line'.
1281     """
1282     mod = inspect.getmodule(register_check)
1283     for (name, function) in inspect.getmembers(mod, inspect.isfunction):
1284         register_check(function)
1285 init_checks_registry()
1286
1287
1288 class Checker(object):
1289     """Load a Python source file, tokenize it, check coding style."""
1290
1291     def __init__(self, filename=None, lines=None,
1292                  options=None, report=None, **kwargs):
1293         if options is None:
1294             options = StyleGuide(kwargs).options
1295         else:
1296             assert not kwargs
1297         self._io_error = None
1298         self._physical_checks = options.physical_checks
1299         self._logical_checks = options.logical_checks
1300         self._ast_checks = options.ast_checks
1301         self.max_line_length = options.max_line_length
1302         self.multiline = False  # in a multiline string?
1303         self.hang_closing = options.hang_closing
1304         self.verbose = options.verbose
1305         self.filename = filename
1306         # Dictionary where a checker can store its custom state.
1307         self._checker_states = {}
1308         if filename is None:
1309             self.filename = 'stdin'
1310             self.lines = lines or []
1311         elif filename == '-':
1312             self.filename = 'stdin'
1313             self.lines = stdin_get_value().splitlines(True)
1314         elif lines is None:
1315             try:
1316                 self.lines = readlines(filename)
1317             except IOError:
1318                 (exc_type, exc) = sys.exc_info()[:2]
1319                 self._io_error = '%s: %s' % (exc_type.__name__, exc)
1320                 self.lines = []
1321         else:
1322             self.lines = lines
1323         if self.lines:
1324             ord0 = ord(self.lines[0][0])
1325             if ord0 in (0xef, 0xfeff):  # Strip the UTF-8 BOM
1326                 if ord0 == 0xfeff:
1327                     self.lines[0] = self.lines[0][1:]
1328                 elif self.lines[0][:3] == '\xef\xbb\xbf':
1329                     self.lines[0] = self.lines[0][3:]
1330         self.report = report or options.report
1331         self.report_error = self.report.error
1332
1333     def report_invalid_syntax(self):
1334         """Check if the syntax is valid."""
1335         (exc_type, exc) = sys.exc_info()[:2]
1336         if len(exc.args) > 1:
1337             offset = exc.args[1]
1338             if len(offset) > 2:
1339                 offset = offset[1:3]
1340         else:
1341             offset = (1, 0)
1342         self.report_error(offset[0], offset[1] or 0,
1343                           'E901 %s: %s' % (exc_type.__name__, exc.args[0]),
1344                           self.report_invalid_syntax)
1345
1346     def readline(self):
1347         """Get the next line from the input buffer."""
1348         if self.line_number >= self.total_lines:
1349             return ''
1350         line = self.lines[self.line_number]
1351         self.line_number += 1
1352         if self.indent_char is None and line[:1] in WHITESPACE:
1353             self.indent_char = line[0]
1354         return line
1355
1356     def run_check(self, check, argument_names):
1357         """Run a check plugin."""
1358         arguments = []
1359         for name in argument_names:
1360             arguments.append(getattr(self, name))
1361         return check(*arguments)
1362
1363     def init_checker_state(self, name, argument_names):
1364         """ Prepares a custom state for the specific checker plugin."""
1365         if 'checker_state' in argument_names:
1366             self.checker_state = self._checker_states.setdefault(name, {})
1367
1368     def check_physical(self, line):
1369         """Run all physical checks on a raw input line."""
1370         self.physical_line = line
1371         for name, check, argument_names in self._physical_checks:
1372             self.init_checker_state(name, argument_names)
1373             result = self.run_check(check, argument_names)
1374             if result is not None:
1375                 (offset, text) = result
1376                 self.report_error(self.line_number, offset, text, check)
1377                 if text[:4] == 'E101':
1378                     self.indent_char = line[0]
1379
1380     def build_tokens_line(self):
1381         """Build a logical line from tokens."""
1382         logical = []
1383         comments = []
1384         length = 0
1385         prev_row = prev_col = mapping = None
1386         for token_type, text, start, end, line in self.tokens:
1387             if token_type in SKIP_TOKENS:
1388                 continue
1389             if not mapping:
1390                 mapping = [(0, start)]
1391             if token_type == tokenize.COMMENT:
1392                 comments.append(text)
1393                 continue
1394             if token_type == tokenize.STRING:
1395                 text = mute_string(text)
1396             if prev_row:
1397                 (start_row, start_col) = start
1398                 if prev_row != start_row:    # different row
1399                     prev_text = self.lines[prev_row - 1][prev_col - 1]
1400                     if prev_text == ',' or (prev_text not in '{[('
1401                                             and text not in '}])'):
1402                         text = ' ' + text
1403                 elif prev_col != start_col:  # different column
1404                     text = line[prev_col:start_col] + text
1405             logical.append(text)
1406             length += len(text)
1407             mapping.append((length, end))
1408             (prev_row, prev_col) = end
1409         self.logical_line = ''.join(logical)
1410         self.noqa = comments and noqa(''.join(comments))
1411         return mapping
1412
1413     def check_logical(self):
1414         """Build a line from tokens and run all logical checks on it."""
1415         self.report.increment_logical_line()
1416         mapping = self.build_tokens_line()
1417
1418         if not mapping:
1419             return
1420
1421         (start_row, start_col) = mapping[0][1]
1422         start_line = self.lines[start_row - 1]
1423         self.indent_level = expand_indent(start_line[:start_col])
1424         if self.blank_before < self.blank_lines:
1425             self.blank_before = self.blank_lines
1426         if self.verbose >= 2:
1427             print(self.logical_line[:80].rstrip())
1428         for name, check, argument_names in self._logical_checks:
1429             if self.verbose >= 4:
1430                 print('   ' + name)
1431             self.init_checker_state(name, argument_names)
1432             for offset, text in self.run_check(check, argument_names) or ():
1433                 if not isinstance(offset, tuple):
1434                     for token_offset, pos in mapping:
1435                         if offset <= token_offset:
1436                             break
1437                     offset = (pos[0], pos[1] + offset - token_offset)
1438                 self.report_error(offset[0], offset[1], text, check)
1439         if self.logical_line:
1440             self.previous_indent_level = self.indent_level
1441             self.previous_logical = self.logical_line
1442         self.blank_lines = 0
1443         self.tokens = []
1444
1445     def check_ast(self):
1446         """Build the file's AST and run all AST checks."""
1447         try:
1448             tree = compile(''.join(self.lines), '', 'exec', PyCF_ONLY_AST)
1449         except (SyntaxError, TypeError):
1450             return self.report_invalid_syntax()
1451         for name, cls, __ in self._ast_checks:
1452             checker = cls(tree, self.filename)
1453             for lineno, offset, text, check in checker.run():
1454                 if not self.lines or not noqa(self.lines[lineno - 1]):
1455                     self.report_error(lineno, offset, text, check)
1456
1457     def generate_tokens(self):
1458         """Tokenize the file, run physical line checks and yield tokens."""
1459         if self._io_error:
1460             self.report_error(1, 0, 'E902 %s' % self._io_error, readlines)
1461         tokengen = tokenize.generate_tokens(self.readline)
1462         try:
1463             for token in tokengen:
1464                 if token[2][0] > self.total_lines:
1465                     return
1466                 self.maybe_check_physical(token)
1467                 yield token
1468         except (SyntaxError, tokenize.TokenError):
1469             self.report_invalid_syntax()
1470
1471     def maybe_check_physical(self, token):
1472         """If appropriate (based on token), check current physical line(s)."""
1473         # Called after every token, but act only on end of line.
1474         if _is_eol_token(token):
1475             # Obviously, a newline token ends a single physical line.
1476             self.check_physical(token[4])
1477         elif token[0] == tokenize.STRING and '\n' in token[1]:
1478             # Less obviously, a string that contains newlines is a
1479             # multiline string, either triple-quoted or with internal
1480             # newlines backslash-escaped. Check every physical line in the
1481             # string *except* for the last one: its newline is outside of
1482             # the multiline string, so we consider it a regular physical
1483             # line, and will check it like any other physical line.
1484             #
1485             # Subtleties:
1486             # - we don't *completely* ignore the last line; if it contains
1487             #   the magical "# noqa" comment, we disable all physical
1488             #   checks for the entire multiline string
1489             # - have to wind self.line_number back because initially it
1490             #   points to the last line of the string, and we want
1491             #   check_physical() to give accurate feedback
1492             if noqa(token[4]):
1493                 return
1494             self.multiline = True
1495             self.line_number = token[2][0]
1496             for line in token[1].split('\n')[:-1]:
1497                 self.check_physical(line + '\n')
1498                 self.line_number += 1
1499             self.multiline = False
1500
1501     def check_all(self, expected=None, line_offset=0):
1502         """Run all checks on the input file."""
1503         self.report.init_file(self.filename, self.lines, expected, line_offset)
1504         self.total_lines = len(self.lines)
1505         if self._ast_checks:
1506             self.check_ast()
1507         self.line_number = 0
1508         self.indent_char = None
1509         self.indent_level = self.previous_indent_level = 0
1510         self.previous_logical = ''
1511         self.tokens = []
1512         self.blank_lines = self.blank_before = 0
1513         parens = 0
1514         for token in self.generate_tokens():
1515             self.tokens.append(token)
1516             token_type, text = token[0:2]
1517             if self.verbose >= 3:
1518                 if token[2][0] == token[3][0]:
1519                     pos = '[%s:%s]' % (token[2][1] or '', token[3][1])
1520                 else:
1521                     pos = 'l.%s' % token[3][0]
1522                 print('l.%s\t%s\t%s\t%r' %
1523                       (token[2][0], pos, tokenize.tok_name[token[0]], text))
1524             if token_type == tokenize.OP:
1525                 if text in '([{':
1526                     parens += 1
1527                 elif text in '}])':
1528                     parens -= 1
1529             elif not parens:
1530                 if token_type in NEWLINE:
1531                     if token_type == tokenize.NEWLINE:
1532                         self.check_logical()
1533                         self.blank_before = 0
1534                     elif len(self.tokens) == 1:
1535                         # The physical line contains only this token.
1536                         self.blank_lines += 1
1537                         del self.tokens[0]
1538                     else:
1539                         self.check_logical()
1540                 elif COMMENT_WITH_NL and token_type == tokenize.COMMENT:
1541                     if len(self.tokens) == 1:
1542                         # The comment also ends a physical line
1543                         token = list(token)
1544                         token[1] = text.rstrip('\r\n')
1545                         token[3] = (token[2][0], token[2][1] + len(token[1]))
1546                         self.tokens = [tuple(token)]
1547                         self.check_logical()
1548         if self.tokens:
1549             self.check_physical(self.lines[-1])
1550             self.check_logical()
1551         return self.report.get_file_results()
1552
1553
1554 class BaseReport(object):
1555     """Collect the results of the checks."""
1556
1557     print_filename = False
1558
1559     def __init__(self, options):
1560         self._benchmark_keys = options.benchmark_keys
1561         self._ignore_code = options.ignore_code
1562         # Results
1563         self.elapsed = 0
1564         self.total_errors = 0
1565         self.counters = dict.fromkeys(self._benchmark_keys, 0)
1566         self.messages = {}
1567
1568     def start(self):
1569         """Start the timer."""
1570         self._start_time = time.time()
1571
1572     def stop(self):
1573         """Stop the timer."""
1574         self.elapsed = time.time() - self._start_time
1575
1576     def init_file(self, filename, lines, expected, line_offset):
1577         """Signal a new file."""
1578         self.filename = filename
1579         self.lines = lines
1580         self.expected = expected or ()
1581         self.line_offset = line_offset
1582         self.file_errors = 0
1583         self.counters['files'] += 1
1584         self.counters['physical lines'] += len(lines)
1585
1586     def increment_logical_line(self):
1587         """Signal a new logical line."""
1588         self.counters['logical lines'] += 1
1589
1590     def error(self, line_number, offset, text, check):
1591         """Report an error, according to options."""
1592         code = text[:4]
1593         if self._ignore_code(code):
1594             return
1595         if code in self.counters:
1596             self.counters[code] += 1
1597         else:
1598             self.counters[code] = 1
1599             self.messages[code] = text[5:]
1600         # Don't care about expected errors or warnings
1601         if code in self.expected:
1602             return
1603         if self.print_filename and not self.file_errors:
1604             print(self.filename)
1605         self.file_errors += 1
1606         self.total_errors += 1
1607         return code
1608
1609     def get_file_results(self):
1610         """Return the count of errors and warnings for this file."""
1611         return self.file_errors
1612
1613     def get_count(self, prefix=''):
1614         """Return the total count of errors and warnings."""
1615         return sum([self.counters[key]
1616                     for key in self.messages if key.startswith(prefix)])
1617
1618     def get_statistics(self, prefix=''):
1619         """Get statistics for message codes that start with the prefix.
1620
1621         prefix='' matches all errors and warnings
1622         prefix='E' matches all errors
1623         prefix='W' matches all warnings
1624         prefix='E4' matches all errors that have to do with imports
1625         """
1626         return ['%-7s %s %s' % (self.counters[key], key, self.messages[key])
1627                 for key in sorted(self.messages) if key.startswith(prefix)]
1628
1629     def print_statistics(self, prefix=''):
1630         """Print overall statistics (number of errors and warnings)."""
1631         for line in self.get_statistics(prefix):
1632             print(line)
1633
1634     def print_benchmark(self):
1635         """Print benchmark numbers."""
1636         print('%-7.2f %s' % (self.elapsed, 'seconds elapsed'))
1637         if self.elapsed:
1638             for key in self._benchmark_keys:
1639                 print('%-7d %s per second (%d total)' %
1640                       (self.counters[key] / self.elapsed, key,
1641                        self.counters[key]))
1642
1643
1644 class FileReport(BaseReport):
1645     """Collect the results of the checks and print only the filenames."""
1646     print_filename = True
1647
1648
1649 class StandardReport(BaseReport):
1650     """Collect and print the results of the checks."""
1651
1652     def __init__(self, options):
1653         super(StandardReport, self).__init__(options)
1654         self._fmt = REPORT_FORMAT.get(options.format.lower(),
1655                                       options.format)
1656         self._repeat = options.repeat
1657         self._show_source = options.show_source
1658         self._show_pep8 = options.show_pep8
1659
1660     def init_file(self, filename, lines, expected, line_offset):
1661         """Signal a new file."""
1662         self._deferred_print = []
1663         return super(StandardReport, self).init_file(
1664             filename, lines, expected, line_offset)
1665
1666     def error(self, line_number, offset, text, check):
1667         """Report an error, according to options."""
1668         code = super(StandardReport, self).error(line_number, offset,
1669                                                  text, check)
1670         if code and (self.counters[code] == 1 or self._repeat):
1671             self._deferred_print.append(
1672                 (line_number, offset, code, text[5:], check.__doc__))
1673         return code
1674
1675     def get_file_results(self):
1676         """Print the result and return the overall count for this file."""
1677         self._deferred_print.sort()
1678         for line_number, offset, code, text, doc in self._deferred_print:
1679             print(self._fmt % {
1680                 'path': self.filename,
1681                 'row': self.line_offset + line_number, 'col': offset + 1,
1682                 'code': code, 'text': text,
1683             })
1684             if self._show_source:
1685                 if line_number > len(self.lines):
1686                     line = ''
1687                 else:
1688                     line = self.lines[line_number - 1]
1689                 print(line.rstrip())
1690                 print(re.sub(r'\S', ' ', line[:offset]) + '^')
1691             if self._show_pep8 and doc:
1692                 print('    ' + doc.strip())
1693         return self.file_errors
1694
1695
1696 class DiffReport(StandardReport):
1697     """Collect and print the results for the changed lines only."""
1698
1699     def __init__(self, options):
1700         super(DiffReport, self).__init__(options)
1701         self._selected = options.selected_lines
1702
1703     def error(self, line_number, offset, text, check):
1704         if line_number not in self._selected[self.filename]:
1705             return
1706         return super(DiffReport, self).error(line_number, offset, text, check)
1707
1708
1709 class StyleGuide(object):
1710     """Initialize a PEP-8 instance with few options."""
1711
1712     def __init__(self, *args, **kwargs):
1713         # build options from the command line
1714         self.checker_class = kwargs.pop('checker_class', Checker)
1715         parse_argv = kwargs.pop('parse_argv', False)
1716         config_file = kwargs.pop('config_file', None)
1717         parser = kwargs.pop('parser', None)
1718         # build options from dict
1719         options_dict = dict(*args, **kwargs)
1720         arglist = None if parse_argv else options_dict.get('paths', None)
1721         options, self.paths = process_options(
1722             arglist, parse_argv, config_file, parser)
1723         if options_dict:
1724             options.__dict__.update(options_dict)
1725             if 'paths' in options_dict:
1726                 self.paths = options_dict['paths']
1727
1728         self.runner = self.input_file
1729         self.options = options
1730
1731         if not options.reporter:
1732             options.reporter = BaseReport if options.quiet else StandardReport
1733
1734         options.select = tuple(options.select or ())
1735         if not (options.select or options.ignore or
1736                 options.testsuite or options.doctest) and DEFAULT_IGNORE:
1737             # The default choice: ignore controversial checks
1738             options.ignore = tuple(DEFAULT_IGNORE.split(','))
1739         else:
1740             # Ignore all checks which are not explicitly selected
1741             options.ignore = ('',) if options.select else tuple(options.ignore)
1742         options.benchmark_keys = BENCHMARK_KEYS[:]
1743         options.ignore_code = self.ignore_code
1744         options.physical_checks = self.get_checks('physical_line')
1745         options.logical_checks = self.get_checks('logical_line')
1746         options.ast_checks = self.get_checks('tree')
1747         self.init_report()
1748
1749     def init_report(self, reporter=None):
1750         """Initialize the report instance."""
1751         self.options.report = (reporter or self.options.reporter)(self.options)
1752         return self.options.report
1753
1754     def check_files(self, paths=None):
1755         """Run all checks on the paths."""
1756         if paths is None:
1757             paths = self.paths
1758         report = self.options.report
1759         runner = self.runner
1760         report.start()
1761         try:
1762             for path in paths:
1763                 if os.path.isdir(path):
1764                     self.input_dir(path)
1765                 elif not self.excluded(path):
1766                     runner(path)
1767         except KeyboardInterrupt:
1768             print('... stopped')
1769         report.stop()
1770         return report
1771
1772     def input_file(self, filename, lines=None, expected=None, line_offset=0):
1773         """Run all checks on a Python source file."""
1774         if self.options.verbose:
1775             print('checking %s' % filename)
1776         fchecker = self.checker_class(
1777             filename, lines=lines, options=self.options)
1778         return fchecker.check_all(expected=expected, line_offset=line_offset)
1779
1780     def input_dir(self, dirname):
1781         """Check all files in this directory and all subdirectories."""
1782         dirname = dirname.rstrip('/')
1783         if self.excluded(dirname):
1784             return 0
1785         counters = self.options.report.counters
1786         verbose = self.options.verbose
1787         filepatterns = self.options.filename
1788         runner = self.runner
1789         for root, dirs, files in os.walk(dirname):
1790             if verbose:
1791                 print('directory ' + root)
1792             counters['directories'] += 1
1793             for subdir in sorted(dirs):
1794                 if self.excluded(subdir, root):
1795                     dirs.remove(subdir)
1796             for filename in sorted(files):
1797                 # contain a pattern that matches?
1798                 if ((filename_match(filename, filepatterns) and
1799                      not self.excluded(filename, root))):
1800                     runner(os.path.join(root, filename))
1801
1802     def excluded(self, filename, parent=None):
1803         """Check if the file should be excluded.
1804
1805         Check if 'options.exclude' contains a pattern that matches filename.
1806         """
1807         if not self.options.exclude:
1808             return False
1809         basename = os.path.basename(filename)
1810         if filename_match(basename, self.options.exclude):
1811             return True
1812         if parent:
1813             filename = os.path.join(parent, filename)
1814         filename = os.path.abspath(filename)
1815         return filename_match(filename, self.options.exclude)
1816
1817     def ignore_code(self, code):
1818         """Check if the error code should be ignored.
1819
1820         If 'options.select' contains a prefix of the error code,
1821         return False.  Else, if 'options.ignore' contains a prefix of
1822         the error code, return True.
1823         """
1824         if len(code) < 4 and any(s.startswith(code)
1825                                  for s in self.options.select):
1826             return False
1827         return (code.startswith(self.options.ignore) and
1828                 not code.startswith(self.options.select))
1829
1830     def get_checks(self, argument_name):
1831         """Get all the checks for this category.
1832
1833         Find all globally visible functions where the first argument name
1834         starts with argument_name and which contain selected tests.
1835         """
1836         checks = []
1837         for check, attrs in _checks[argument_name].items():
1838             (codes, args) = attrs
1839             if any(not (code and self.ignore_code(code)) for code in codes):
1840                 checks.append((check.__name__, check, args))
1841         return sorted(checks)
1842
1843
1844 def get_parser(prog='pep8', version=__version__):
1845     parser = OptionParser(prog=prog, version=version,
1846                           usage="%prog [options] input ...")
1847     parser.config_options = [
1848         'exclude', 'filename', 'select', 'ignore', 'max-line-length',
1849         'hang-closing', 'count', 'format', 'quiet', 'show-pep8',
1850         'show-source', 'statistics', 'verbose']
1851     parser.add_option('-v', '--verbose', default=0, action='count',
1852                       help="print status messages, or debug with -vv")
1853     parser.add_option('-q', '--quiet', default=0, action='count',
1854                       help="report only file names, or nothing with -qq")
1855     parser.add_option('-r', '--repeat', default=True, action='store_true',
1856                       help="(obsolete) show all occurrences of the same error")
1857     parser.add_option('--first', action='store_false', dest='repeat',
1858                       help="show first occurrence of each error")
1859     parser.add_option('--exclude', metavar='patterns', default=DEFAULT_EXCLUDE,
1860                       help="exclude files or directories which match these "
1861                            "comma separated patterns (default: %default)")
1862     parser.add_option('--filename', metavar='patterns', default='*.py',
1863                       help="when parsing directories, only check filenames "
1864                            "matching these comma separated patterns "
1865                            "(default: %default)")
1866     parser.add_option('--select', metavar='errors', default='',
1867                       help="select errors and warnings (e.g. E,W6)")
1868     parser.add_option('--ignore', metavar='errors', default='',
1869                       help="skip errors and warnings (e.g. E4,W)")
1870     parser.add_option('--show-source', action='store_true',
1871                       help="show source code for each error")
1872     parser.add_option('--show-pep8', action='store_true',
1873                       help="show text of PEP 8 for each error "
1874                            "(implies --first)")
1875     parser.add_option('--statistics', action='store_true',
1876                       help="count errors and warnings")
1877     parser.add_option('--count', action='store_true',
1878                       help="print total number of errors and warnings "
1879                            "to standard error and set exit code to 1 if "
1880                            "total is not null")
1881     parser.add_option('--max-line-length', type='int', metavar='n',
1882                       default=MAX_LINE_LENGTH,
1883                       help="set maximum allowed line length "
1884                            "(default: %default)")
1885     parser.add_option('--hang-closing', action='store_true',
1886                       help="hang closing bracket instead of matching "
1887                            "indentation of opening bracket's line")
1888     parser.add_option('--format', metavar='format', default='default',
1889                       help="set the error format [default|pylint|<custom>]")
1890     parser.add_option('--diff', action='store_true',
1891                       help="report only lines changed according to the "
1892                            "unified diff received on STDIN")
1893     group = parser.add_option_group("Testing Options")
1894     if os.path.exists(TESTSUITE_PATH):
1895         group.add_option('--testsuite', metavar='dir',
1896                          help="run regression tests from dir")
1897         group.add_option('--doctest', action='store_true',
1898                          help="run doctest on myself")
1899     group.add_option('--benchmark', action='store_true',
1900                      help="measure processing speed")
1901     return parser
1902
1903
1904 def read_config(options, args, arglist, parser):
1905     """Read both user configuration and local configuration."""
1906     config = RawConfigParser()
1907
1908     user_conf = options.config
1909     if user_conf and os.path.isfile(user_conf):
1910         if options.verbose:
1911             print('user configuration: %s' % user_conf)
1912         config.read(user_conf)
1913
1914     local_dir = os.curdir
1915     parent = tail = args and os.path.abspath(os.path.commonprefix(args))
1916     while tail:
1917         if config.read([os.path.join(parent, fn) for fn in PROJECT_CONFIG]):
1918             local_dir = parent
1919             if options.verbose:
1920                 print('local configuration: in %s' % parent)
1921             break
1922         (parent, tail) = os.path.split(parent)
1923
1924     pep8_section = parser.prog
1925     if config.has_section(pep8_section):
1926         option_list = dict([(o.dest, o.type or o.action)
1927                             for o in parser.option_list])
1928
1929         # First, read the default values
1930         (new_options, __) = parser.parse_args([])
1931
1932         # Second, parse the configuration
1933         for opt in config.options(pep8_section):
1934             if opt.replace('_', '-') not in parser.config_options:
1935                 print("  unknown option '%s' ignored" % opt)
1936                 continue
1937             if options.verbose > 1:
1938                 print("  %s = %s" % (opt, config.get(pep8_section, opt)))
1939             normalized_opt = opt.replace('-', '_')
1940             opt_type = option_list[normalized_opt]
1941             if opt_type in ('int', 'count'):
1942                 value = config.getint(pep8_section, opt)
1943             elif opt_type == 'string':
1944                 value = config.get(pep8_section, opt)
1945                 if normalized_opt == 'exclude':
1946                     value = normalize_paths(value, local_dir)
1947             else:
1948                 assert opt_type in ('store_true', 'store_false')
1949                 value = config.getboolean(pep8_section, opt)
1950             setattr(new_options, normalized_opt, value)
1951
1952         # Third, overwrite with the command-line options
1953         (options, __) = parser.parse_args(arglist, values=new_options)
1954     options.doctest = options.testsuite = False
1955     return options
1956
1957
1958 def process_options(arglist=None, parse_argv=False, config_file=None,
1959                     parser=None):
1960     """Process options passed either via arglist or via command line args."""
1961     if not parser:
1962         parser = get_parser()
1963     if not parser.has_option('--config'):
1964         if config_file is True:
1965             config_file = DEFAULT_CONFIG
1966         group = parser.add_option_group("Configuration", description=(
1967             "The project options are read from the [%s] section of the "
1968             "tox.ini file or the setup.cfg file located in any parent folder "
1969             "of the path(s) being processed.  Allowed options are: %s." %
1970             (parser.prog, ', '.join(parser.config_options))))
1971         group.add_option('--config', metavar='path', default=config_file,
1972                          help="user config file location (default: %default)")
1973     # Don't read the command line if the module is used as a library.
1974     if not arglist and not parse_argv:
1975         arglist = []
1976     # If parse_argv is True and arglist is None, arguments are
1977     # parsed from the command line (sys.argv)
1978     (options, args) = parser.parse_args(arglist)
1979     options.reporter = None
1980
1981     if options.ensure_value('testsuite', False):
1982         args.append(options.testsuite)
1983     elif not options.ensure_value('doctest', False):
1984         if parse_argv and not args:
1985             if options.diff or any(os.path.exists(name)
1986                                    for name in PROJECT_CONFIG):
1987                 args = ['.']
1988             else:
1989                 parser.error('input not specified')
1990         options = read_config(options, args, arglist, parser)
1991         options.reporter = parse_argv and options.quiet == 1 and FileReport
1992
1993     options.filename = options.filename and options.filename.split(',')
1994     options.exclude = normalize_paths(options.exclude)
1995     options.select = options.select and options.select.split(',')
1996     options.ignore = options.ignore and options.ignore.split(',')
1997
1998     if options.diff:
1999         options.reporter = DiffReport
2000         stdin = stdin_get_value()
2001         options.selected_lines = parse_udiff(stdin, options.filename, args[0])
2002         args = sorted(options.selected_lines)
2003
2004     return options, args
2005
2006
2007 def _main():
2008     """Parse options and run checks on Python source."""
2009     import signal
2010
2011     # Handle "Broken pipe" gracefully
2012     try:
2013         signal.signal(signal.SIGPIPE, lambda signum, frame: sys.exit(1))
2014     except AttributeError:
2015         pass    # not supported on Windows
2016
2017     pep8style = StyleGuide(parse_argv=True, config_file=True)
2018     options = pep8style.options
2019     if options.doctest or options.testsuite:
2020         from testsuite.support import run_tests
2021         report = run_tests(pep8style)
2022     else:
2023         report = pep8style.check_files()
2024     if options.statistics:
2025         report.print_statistics()
2026     if options.benchmark:
2027         report.print_benchmark()
2028     if options.testsuite and not options.quiet:
2029         report.print_results()
2030     if report.total_errors:
2031         if options.count:
2032             sys.stderr.write(str(report.total_errors) + '\n')
2033         sys.exit(1)
2034
2035 if __name__ == '__main__':
2036     _main()