git.samba.org - third_party/pep8/blob - pep8.py

   1 #!/usr/bin/env python
   2 # pep8.py - Check Python source code formatting, according to PEP 8
   3 # Copyright (C) 2006-2009 Johann C. Rocholl <johann@rocholl.net>
   4 # Copyright (C) 2009-2014 Florent Xicluna <florent.xicluna@gmail.com>
   5 # Copyright (C) 2014 Ian Lee <ianlee1521@gmail.com>
   6 #
   7 # Permission is hereby granted, free of charge, to any person
   8 # obtaining a copy of this software and associated documentation files
   9 # (the "Software"), to deal in the Software without restriction,
  10 # including without limitation the rights to use, copy, modify, merge,
  11 # publish, distribute, sublicense, and/or sell copies of the Software,
  12 # and to permit persons to whom the Software is furnished to do so,
  13 # subject to the following conditions:
  14 #
  15 # The above copyright notice and this permission notice shall be
  16 # included in all copies or substantial portions of the Software.
  17 #
  18 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  21 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  22 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  23 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  24 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  25 # SOFTWARE.
  26
  27 r"""
  28 Check Python source code formatting, according to PEP 8.
  29
  30 For usage and a list of options, try this:
  31 $ python pep8.py -h
  32
  33 This program and its regression test suite live here:
  34 http://github.com/jcrocholl/pep8
  35
  36 Groups of errors and warnings:
  37 E errors
  38 W warnings
  39 100 indentation
  40 200 whitespace
  41 300 blank lines
  42 400 imports
  43 500 line length
  44 600 deprecation
  45 700 statements
  46 900 syntax error
  47 """
  48 from __future__ import with_statement
  49
  50 __version__ = '1.6.0a0'
  51
  52 import os
  53 import sys
  54 import re
  55 import time
  56 import inspect
  57 import keyword
  58 import tokenize
  59 from optparse import OptionParser
  60 from fnmatch import fnmatch
  61 try:
  62     from configparser import RawConfigParser
  63     from io import TextIOWrapper
  64 except ImportError:
  65     from ConfigParser import RawConfigParser
  66
  67 DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git,__pycache__,.tox'
  68 DEFAULT_IGNORE = 'E123,E226,E24,E704'
  69 try:
  70     if sys.platform == 'win32':
  71         DEFAULT_CONFIG = os.path.expanduser(r'~\.pep8')
  72     else:
  73         DEFAULT_CONFIG = os.path.join(os.getenv('XDG_CONFIG_HOME') or
  74                                       os.path.expanduser('~/.config'), 'pep8')
  75 except ImportError:
  76     DEFAULT_CONFIG = None
  77
  78 PROJECT_CONFIG = ('setup.cfg', 'tox.ini', '.pep8')
  79 TESTSUITE_PATH = os.path.join(os.path.dirname(__file__), 'testsuite')
  80 MAX_LINE_LENGTH = 79
  81 REPORT_FORMAT = {
  82     'default': '%(path)s:%(row)d:%(col)d: %(code)s %(text)s',
  83     'pylint': '%(path)s:%(row)d: [%(code)s] %(text)s',
  84 }
  85
  86 PyCF_ONLY_AST = 1024
  87 SINGLETONS = frozenset(['False', 'None', 'True'])
  88 KEYWORDS = frozenset(keyword.kwlist + ['print']) - SINGLETONS
  89 UNARY_OPERATORS = frozenset(['>>', '**', '*', '+', '-'])
  90 ARITHMETIC_OP = frozenset(['**', '*', '/', '//', '+', '-'])
  91 WS_OPTIONAL_OPERATORS = ARITHMETIC_OP.union(['^', '&', '|', '<<', '>>', '%'])
  92 WS_NEEDED_OPERATORS = frozenset([
  93     '**=', '*=', '/=', '//=', '+=', '-=', '!=', '<>', '<', '>',
  94     '%=', '^=', '&=', '|=', '==', '<=', '>=', '<<=', '>>=', '='])
  95 WHITESPACE = frozenset(' \t')
  96 NEWLINE = frozenset([tokenize.NL, tokenize.NEWLINE])
  97 SKIP_TOKENS = NEWLINE.union([tokenize.INDENT, tokenize.DEDENT])
  98 # ERRORTOKEN is triggered by backticks in Python 3
  99 SKIP_COMMENTS = SKIP_TOKENS.union([tokenize.COMMENT, tokenize.ERRORTOKEN])
 100 BENCHMARK_KEYS = ['directories', 'files', 'logical lines', 'physical lines']
 101
 102 INDENT_REGEX = re.compile(r'([ \t]*)')
 103 RAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,')
 104 RERAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,.*,\s*\w+\s*$')
 105 ERRORCODE_REGEX = re.compile(r'\b[A-Z]\d{3}\b')
 106 DOCSTRING_REGEX = re.compile(r'u?r?["\']')
 107 EXTRANEOUS_WHITESPACE_REGEX = re.compile(r'[[({] | []}),;:]')
 108 WHITESPACE_AFTER_COMMA_REGEX = re.compile(r'[,;:]\s*(?:  |\t)')
 109 COMPARE_SINGLETON_REGEX = re.compile(r'\b(None|False|True)?\s*([=!]=)'
 110                                      r'\s*(?(1)|(None|False|True))\b')
 111 COMPARE_NEGATIVE_REGEX = re.compile(r'\b(not)\s+[^][)(}{ ]+\s+(in|is)\s')
 112 COMPARE_TYPE_REGEX = re.compile(r'(?:[=!]=|is(?:\s+not)?)\s*type(?:s.\w+Type'
 113                                 r'|\s*\(\s*([^)]*[^ )])\s*\))')
 114 KEYWORD_REGEX = re.compile(r'(\s*)\b(?:%s)\b(\s*)' % r'|'.join(KEYWORDS))
 115 OPERATOR_REGEX = re.compile(r'(?:[^,\s])(\s*)(?:[-+*/|!<=>%&^]+)(\s*)')
 116 LAMBDA_REGEX = re.compile(r'\blambda\b')
 117 HUNK_REGEX = re.compile(r'^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@.*$')
 118
 119 # Work around Python < 2.6 behaviour, which does not generate NL after
 120 # a comment which is on a line by itself.
 121 COMMENT_WITH_NL = tokenize.generate_tokens(['#\n'].pop).send(None)[1] == '#\n'
 122
 123
 124 ##############################################################################
 125 # Plugins (check functions) for physical lines
 126 ##############################################################################
 127
 128
 129 def tabs_or_spaces(physical_line, indent_char):
 130     r"""Never mix tabs and spaces.
 131
 132     The most popular way of indenting Python is with spaces only.  The
 133     second-most popular way is with tabs only.  Code indented with a mixture
 134     of tabs and spaces should be converted to using spaces exclusively.  When
 135     invoking the Python command line interpreter with the -t option, it issues
 136     warnings about code that illegally mixes tabs and spaces.  When using -tt
 137     these warnings become errors.  These options are highly recommended!
 138
 139     Okay: if a == 0:\n        a = 1\n        b = 1
 140     E101: if a == 0:\n        a = 1\n\tb = 1
 141     """
 142     indent = INDENT_REGEX.match(physical_line).group(1)
 143     for offset, char in enumerate(indent):
 144         if char != indent_char:
 145             return offset, "E101 indentation contains mixed spaces and tabs"
 146
 147
 148 def tabs_obsolete(physical_line):
 149     r"""For new projects, spaces-only are strongly recommended over tabs.
 150
 151     Okay: if True:\n    return
 152     W191: if True:\n\treturn
 153     """
 154     indent = INDENT_REGEX.match(physical_line).group(1)
 155     if '\t' in indent:
 156         return indent.index('\t'), "W191 indentation contains tabs"
 157
 158
 159 def trailing_whitespace(physical_line):
 160     r"""Trailing whitespace is superfluous.
 161
 162     The warning returned varies on whether the line itself is blank, for easier
 163     filtering for those who want to indent their blank lines.
 164
 165     Okay: spam(1)\n#
 166     W291: spam(1) \n#
 167     W293: class Foo(object):\n    \n    bang = 12
 168     """
 169     physical_line = physical_line.rstrip('\n')    # chr(10), newline
 170     physical_line = physical_line.rstrip('\r')    # chr(13), carriage return
 171     physical_line = physical_line.rstrip('\x0c')  # chr(12), form feed, ^L
 172     stripped = physical_line.rstrip(' \t\v')
 173     if physical_line != stripped:
 174         if stripped:
 175             return len(stripped), "W291 trailing whitespace"
 176         else:
 177             return 0, "W293 blank line contains whitespace"
 178
 179
 180 def trailing_blank_lines(physical_line, lines, line_number, total_lines):
 181     r"""Trailing blank lines are superfluous.
 182
 183     Okay: spam(1)
 184     W391: spam(1)\n
 185
 186     However the last line should end with a new line (warning W292).
 187     """
 188     if line_number == total_lines:
 189         stripped_last_line = physical_line.rstrip()
 190         if not stripped_last_line:
 191             return 0, "W391 blank line at end of file"
 192         if stripped_last_line == physical_line:
 193             return len(physical_line), "W292 no newline at end of file"
 194
 195
 196 def maximum_line_length(physical_line, max_line_length, multiline):
 197     r"""Limit all lines to a maximum of 79 characters.
 198
 199     There are still many devices around that are limited to 80 character
 200     lines; plus, limiting windows to 80 characters makes it possible to have
 201     several windows side-by-side.  The default wrapping on such devices looks
 202     ugly.  Therefore, please limit all lines to a maximum of 79 characters.
 203     For flowing long blocks of text (docstrings or comments), limiting the
 204     length to 72 characters is recommended.
 205
 206     Reports error E501.
 207     """
 208     line = physical_line.rstrip()
 209     length = len(line)
 210     if length > max_line_length and not noqa(line):
 211         # Special case for long URLs in multi-line docstrings or comments,
 212         # but still report the error when the 72 first chars are whitespaces.
 213         chunks = line.split()
 214         if ((len(chunks) == 1 and multiline) or
 215             (len(chunks) == 2 and chunks[0] == '#')) and \
 216                 len(line) - len(chunks[-1]) < max_line_length - 7:
 217             return
 218         if hasattr(line, 'decode'):   # Python 2
 219             # The line could contain multi-byte characters
 220             try:
 221                 length = len(line.decode('utf-8'))
 222             except UnicodeError:
 223                 pass
 224         if length > max_line_length:
 225             return (max_line_length, "E501 line too long "
 226                     "(%d > %d characters)" % (length, max_line_length))
 227
 228
 229 ##############################################################################
 230 # Plugins (check functions) for logical lines
 231 ##############################################################################
 232
 233
 234 def blank_lines(logical_line, blank_lines, indent_level, line_number,
 235                 blank_before, previous_logical, previous_indent_level):
 236     r"""Separate top-level function and class definitions with two blank lines.
 237
 238     Method definitions inside a class are separated by a single blank line.
 239
 240     Extra blank lines may be used (sparingly) to separate groups of related
 241     functions.  Blank lines may be omitted between a bunch of related
 242     one-liners (e.g. a set of dummy implementations).
 243
 244     Use blank lines in functions, sparingly, to indicate logical sections.
 245
 246     Okay: def a():\n    pass\n\n\ndef b():\n    pass
 247     Okay: def a():\n    pass\n\n\n# Foo\n# Bar\n\ndef b():\n    pass
 248
 249     E301: class Foo:\n    b = 0\n    def bar():\n        pass
 250     E302: def a():\n    pass\n\ndef b(n):\n    pass
 251     E303: def a():\n    pass\n\n\n\ndef b(n):\n    pass
 252     E303: def a():\n\n\n\n    pass
 253     E304: @decorator\n\ndef a():\n    pass
 254     """
 255     if line_number < 3 and not previous_logical:
 256         return  # Don't expect blank lines before the first line
 257     if previous_logical.startswith('@'):
 258         if blank_lines:
 259             yield 0, "E304 blank lines found after function decorator"
 260     elif blank_lines > 2 or (indent_level and blank_lines == 2):
 261         yield 0, "E303 too many blank lines (%d)" % blank_lines
 262     elif logical_line.startswith(('def ', 'class ', '@')):
 263         if indent_level:
 264             if not (blank_before or previous_indent_level < indent_level or
 265                     DOCSTRING_REGEX.match(previous_logical)):
 266                 yield 0, "E301 expected 1 blank line, found 0"
 267         elif blank_before != 2:
 268             yield 0, "E302 expected 2 blank lines, found %d" % blank_before
 269
 270
 271 def extraneous_whitespace(logical_line):
 272     r"""Avoid extraneous whitespace.
 273
 274     Avoid extraneous whitespace in these situations:
 275     - Immediately inside parentheses, brackets or braces.
 276     - Immediately before a comma, semicolon, or colon.
 277
 278     Okay: spam(ham[1], {eggs: 2})
 279     E201: spam( ham[1], {eggs: 2})
 280     E201: spam(ham[ 1], {eggs: 2})
 281     E201: spam(ham[1], { eggs: 2})
 282     E202: spam(ham[1], {eggs: 2} )
 283     E202: spam(ham[1 ], {eggs: 2})
 284     E202: spam(ham[1], {eggs: 2 })
 285
 286     E203: if x == 4: print x, y; x, y = y , x
 287     E203: if x == 4: print x, y ; x, y = y, x
 288     E203: if x == 4 : print x, y; x, y = y, x
 289     """
 290     line = logical_line
 291     for match in EXTRANEOUS_WHITESPACE_REGEX.finditer(line):
 292         text = match.group()
 293         char = text.strip()
 294         found = match.start()
 295         if text == char + ' ':
 296             # assert char in '([{'
 297             yield found + 1, "E201 whitespace after '%s'" % char
 298         elif line[found - 1] != ',':
 299             code = ('E202' if char in '}])' else 'E203')  # if char in ',;:'
 300             yield found, "%s whitespace before '%s'" % (code, char)
 301
 302
 303 def whitespace_around_keywords(logical_line):
 304     r"""Avoid extraneous whitespace around keywords.
 305
 306     Okay: True and False
 307     E271: True and  False
 308     E272: True  and False
 309     E273: True and\tFalse
 310     E274: True\tand False
 311     """
 312     for match in KEYWORD_REGEX.finditer(logical_line):
 313         before, after = match.groups()
 314
 315         if '\t' in before:
 316             yield match.start(1), "E274 tab before keyword"
 317         elif len(before) > 1:
 318             yield match.start(1), "E272 multiple spaces before keyword"
 319
 320         if '\t' in after:
 321             yield match.start(2), "E273 tab after keyword"
 322         elif len(after) > 1:
 323             yield match.start(2), "E271 multiple spaces after keyword"
 324
 325
 326 def missing_whitespace(logical_line):
 327     r"""Each comma, semicolon or colon should be followed by whitespace.
 328
 329     Okay: [a, b]
 330     Okay: (3,)
 331     Okay: a[1:4]
 332     Okay: a[:4]
 333     Okay: a[1:]
 334     Okay: a[1:4:2]
 335     E231: ['a','b']
 336     E231: foo(bar,baz)
 337     E231: [{'a':'b'}]
 338     """
 339     line = logical_line
 340     for index in range(len(line) - 1):
 341         char = line[index]
 342         if char in ',;:' and line[index + 1] not in WHITESPACE:
 343             before = line[:index]
 344             if char == ':' and before.count('[') > before.count(']') and \
 345                     before.rfind('{') < before.rfind('['):
 346                 continue  # Slice syntax, no space required
 347             if char == ',' and line[index + 1] == ')':
 348                 continue  # Allow tuple with only one element: (3,)
 349             yield index, "E231 missing whitespace after '%s'" % char
 350
 351
 352 def indentation(logical_line, previous_logical, indent_char,
 353                 indent_level, previous_indent_level):
 354     r"""Use 4 spaces per indentation level.
 355
 356     For really old code that you don't want to mess up, you can continue to
 357     use 8-space tabs.
 358
 359     Okay: a = 1
 360     Okay: if a == 0:\n    a = 1
 361     E111:   a = 1
 362     E114:   # a = 1
 363
 364     Okay: for item in items:\n    pass
 365     E112: for item in items:\npass
 366     E115: for item in items:\n# Hi\n    pass
 367
 368     Okay: a = 1\nb = 2
 369     E113: a = 1\n    b = 2
 370     E116: a = 1\n    # b = 2
 371     """
 372     c = 0 if logical_line else 3
 373     tmpl = "E11%d %s" if logical_line else "E11%d %s (comment)"
 374     if indent_level % 4:
 375         yield 0, tmpl % (1 + c, "indentation is not a multiple of four")
 376     indent_expect = previous_logical.endswith(':')
 377     if indent_expect and indent_level <= previous_indent_level:
 378         yield 0, tmpl % (2 + c, "expected an indented block")
 379     elif not indent_expect and indent_level > previous_indent_level:
 380         yield 0, tmpl % (3 + c, "unexpected indentation")
 381
 382
 383 def continued_indentation(logical_line, tokens, indent_level, hang_closing,
 384                           indent_char, noqa, verbose):
 385     r"""Continuation lines indentation.
 386
 387     Continuation lines should align wrapped elements either vertically
 388     using Python's implicit line joining inside parentheses, brackets
 389     and braces, or using a hanging indent.
 390
 391     When using a hanging indent these considerations should be applied:
 392     - there should be no arguments on the first line, and
 393     - further indentation should be used to clearly distinguish itself as a
 394       continuation line.
 395
 396     Okay: a = (\n)
 397     E123: a = (\n    )
 398
 399     Okay: a = (\n    42)
 400     E121: a = (\n   42)
 401     E122: a = (\n42)
 402     E123: a = (\n    42\n    )
 403     E124: a = (24,\n     42\n)
 404     E125: if (\n    b):\n    pass
 405     E126: a = (\n        42)
 406     E127: a = (24,\n      42)
 407     E128: a = (24,\n    42)
 408     E129: if (a or\n    b):\n    pass
 409     E131: a = (\n    42\n 24)
 410     """
 411     first_row = tokens[0][2][0]
 412     nrows = 1 + tokens[-1][2][0] - first_row
 413     if noqa or nrows == 1:
 414         return
 415
 416     # indent_next tells us whether the next block is indented; assuming
 417     # that it is indented by 4 spaces, then we should not allow 4-space
 418     # indents on the final continuation line; in turn, some other
 419     # indents are allowed to have an extra 4 spaces.
 420     indent_next = logical_line.endswith(':')
 421
 422     row = depth = 0
 423     valid_hangs = (4,) if indent_char != '\t' else (4, 8)
 424     # remember how many brackets were opened on each line
 425     parens = [0] * nrows
 426     # relative indents of physical lines
 427     rel_indent = [0] * nrows
 428     # for each depth, collect a list of opening rows
 429     open_rows = [[0]]
 430     # for each depth, memorize the hanging indentation
 431     hangs = [None]
 432     # visual indents
 433     indent_chances = {}
 434     last_indent = tokens[0][2]
 435     visual_indent = None
 436     # for each depth, memorize the visual indent column
 437     indent = [last_indent[1]]
 438     if verbose >= 3:
 439         print(">>> " + tokens[0][4].rstrip())
 440
 441     for token_type, text, start, end, line in tokens:
 442
 443         newline = row < start[0] - first_row
 444         if newline:
 445             row = start[0] - first_row
 446             newline = not last_token_multiline and token_type not in NEWLINE
 447
 448         if newline:
 449             # this is the beginning of a continuation line.
 450             last_indent = start
 451             if verbose >= 3:
 452                 print("... " + line.rstrip())
 453
 454             # record the initial indent.
 455             rel_indent[row] = expand_indent(line) - indent_level
 456
 457             # identify closing bracket
 458             close_bracket = (token_type == tokenize.OP and text in ']})')
 459
 460             # is the indent relative to an opening bracket line?
 461             for open_row in reversed(open_rows[depth]):
 462                 hang = rel_indent[row] - rel_indent[open_row]
 463                 hanging_indent = hang in valid_hangs
 464                 if hanging_indent:
 465                     break
 466             if hangs[depth]:
 467                 hanging_indent = (hang == hangs[depth])
 468             # is there any chance of visual indent?
 469             visual_indent = (not close_bracket and hang > 0 and
 470                              indent_chances.get(start[1]))
 471
 472             if close_bracket and indent[depth]:
 473                 # closing bracket for visual indent
 474                 if start[1] != indent[depth]:
 475                     yield (start, "E124 closing bracket does not match "
 476                            "visual indentation")
 477             elif close_bracket and not hang:
 478                 # closing bracket matches indentation of opening bracket's line
 479                 if hang_closing:
 480                     yield start, "E133 closing bracket is missing indentation"
 481             elif indent[depth] and start[1] < indent[depth]:
 482                 if visual_indent is not True:
 483                     # visual indent is broken
 484                     yield (start, "E128 continuation line "
 485                            "under-indented for visual indent")
 486             elif hanging_indent or (indent_next and rel_indent[row] == 8):
 487                 # hanging indent is verified
 488                 if close_bracket and not hang_closing:
 489                     yield (start, "E123 closing bracket does not match "
 490                            "indentation of opening bracket's line")
 491                 hangs[depth] = hang
 492             elif visual_indent is True:
 493                 # visual indent is verified
 494                 indent[depth] = start[1]
 495             elif visual_indent in (text, str):
 496                 # ignore token lined up with matching one from a previous line
 497                 pass
 498             else:
 499                 # indent is broken
 500                 if hang <= 0:
 501                     error = "E122", "missing indentation or outdented"
 502                 elif indent[depth]:
 503                     error = "E127", "over-indented for visual indent"
 504                 elif not close_bracket and hangs[depth]:
 505                     error = "E131", "unaligned for hanging indent"
 506                 else:
 507                     hangs[depth] = hang
 508                     if hang > 4:
 509                         error = "E126", "over-indented for hanging indent"
 510                     else:
 511                         error = "E121", "under-indented for hanging indent"
 512                 yield start, "%s continuation line %s" % error
 513
 514         # look for visual indenting
 515         if (parens[row] and token_type not in (tokenize.NL, tokenize.COMMENT)
 516                 and not indent[depth]):
 517             indent[depth] = start[1]
 518             indent_chances[start[1]] = True
 519             if verbose >= 4:
 520                 print("bracket depth %s indent to %s" % (depth, start[1]))
 521         # deal with implicit string concatenation
 522         elif (token_type in (tokenize.STRING, tokenize.COMMENT) or
 523               text in ('u', 'ur', 'b', 'br')):
 524             indent_chances[start[1]] = str
 525         # special case for the "if" statement because len("if (") == 4
 526         elif not indent_chances and not row and not depth and text == 'if':
 527             indent_chances[end[1] + 1] = True
 528         elif text == ':' and line[end[1]:].isspace():
 529             open_rows[depth].append(row)
 530
 531         # keep track of bracket depth
 532         if token_type == tokenize.OP:
 533             if text in '([{':
 534                 depth += 1
 535                 indent.append(0)
 536                 hangs.append(None)
 537                 if len(open_rows) == depth:
 538                     open_rows.append([])
 539                 open_rows[depth].append(row)
 540                 parens[row] += 1
 541                 if verbose >= 4:
 542                     print("bracket depth %s seen, col %s, visual min = %s" %
 543                           (depth, start[1], indent[depth]))
 544             elif text in ')]}' and depth > 0:
 545                 # parent indents should not be more than this one
 546                 prev_indent = indent.pop() or last_indent[1]
 547                 hangs.pop()
 548                 for d in range(depth):
 549                     if indent[d] > prev_indent:
 550                         indent[d] = 0
 551                 for ind in list(indent_chances):
 552                     if ind >= prev_indent:
 553                         del indent_chances[ind]
 554                 del open_rows[depth + 1:]
 555                 depth -= 1
 556                 if depth:
 557                     indent_chances[indent[depth]] = True
 558                 for idx in range(row, -1, -1):
 559                     if parens[idx]:
 560                         parens[idx] -= 1
 561                         break
 562             assert len(indent) == depth + 1
 563             if start[1] not in indent_chances:
 564                 # allow to line up tokens
 565                 indent_chances[start[1]] = text
 566
 567         last_token_multiline = (start[0] != end[0])
 568         if last_token_multiline:
 569             rel_indent[end[0] - first_row] = rel_indent[row]
 570
 571     if indent_next and expand_indent(line) == indent_level + 4:
 572         pos = (start[0], indent[0] + 4)
 573         if visual_indent:
 574             code = "E129 visually indented line"
 575         else:
 576             code = "E125 continuation line"
 577         yield pos, "%s with same indent as next logical line" % code
 578
 579
 580 def whitespace_before_parameters(logical_line, tokens):
 581     r"""Avoid extraneous whitespace.
 582
 583     Avoid extraneous whitespace in the following situations:
 584     - before the open parenthesis that starts the argument list of a
 585       function call.
 586     - before the open parenthesis that starts an indexing or slicing.
 587
 588     Okay: spam(1)
 589     E211: spam (1)
 590
 591     Okay: dict['key'] = list[index]
 592     E211: dict ['key'] = list[index]
 593     E211: dict['key'] = list [index]
 594     """
 595     prev_type, prev_text, __, prev_end, __ = tokens[0]
 596     for index in range(1, len(tokens)):
 597         token_type, text, start, end, __ = tokens[index]
 598         if (token_type == tokenize.OP and
 599             text in '([' and
 600             start != prev_end and
 601             (prev_type == tokenize.NAME or prev_text in '}])') and
 602             # Syntax "class A (B):" is allowed, but avoid it
 603             (index < 2 or tokens[index - 2][1] != 'class') and
 604                 # Allow "return (a.foo for a in range(5))"
 605                 not keyword.iskeyword(prev_text)):
 606             yield prev_end, "E211 whitespace before '%s'" % text
 607         prev_type = token_type
 608         prev_text = text
 609         prev_end = end
 610
 611
 612 def whitespace_around_operator(logical_line):
 613     r"""Avoid extraneous whitespace around an operator.
 614
 615     Okay: a = 12 + 3
 616     E221: a = 4  + 5
 617     E222: a = 4 +  5
 618     E223: a = 4\t+ 5
 619     E224: a = 4 +\t5
 620     """
 621     for match in OPERATOR_REGEX.finditer(logical_line):
 622         before, after = match.groups()
 623
 624         if '\t' in before:
 625             yield match.start(1), "E223 tab before operator"
 626         elif len(before) > 1:
 627             yield match.start(1), "E221 multiple spaces before operator"
 628
 629         if '\t' in after:
 630             yield match.start(2), "E224 tab after operator"
 631         elif len(after) > 1:
 632             yield match.start(2), "E222 multiple spaces after operator"
 633
 634
 635 def missing_whitespace_around_operator(logical_line, tokens):
 636     r"""Surround operators with a single space on either side.
 637
 638     - Always surround these binary operators with a single space on
 639       either side: assignment (=), augmented assignment (+=, -= etc.),
 640       comparisons (==, <, >, !=, <=, >=, in, not in, is, is not),
 641       Booleans (and, or, not).
 642
 643     - If operators with different priorities are used, consider adding
 644       whitespace around the operators with the lowest priorities.
 645
 646     Okay: i = i + 1
 647     Okay: submitted += 1
 648     Okay: x = x * 2 - 1
 649     Okay: hypot2 = x * x + y * y
 650     Okay: c = (a + b) * (a - b)
 651     Okay: foo(bar, key='word', *args, **kwargs)
 652     Okay: alpha[:-i]
 653
 654     E225: i=i+1
 655     E225: submitted +=1
 656     E225: x = x /2 - 1
 657     E225: z = x **y
 658     E226: c = (a+b) * (a-b)
 659     E226: hypot2 = x*x + y*y
 660     E227: c = a|b
 661     E228: msg = fmt%(errno, errmsg)
 662     """
 663     parens = 0
 664     need_space = False
 665     prev_type = tokenize.OP
 666     prev_text = prev_end = None
 667     for token_type, text, start, end, line in tokens:
 668         if token_type in SKIP_COMMENTS:
 669             continue
 670         if text in ('(', 'lambda'):
 671             parens += 1
 672         elif text == ')':
 673             parens -= 1
 674         if need_space:
 675             if start != prev_end:
 676                 # Found a (probably) needed space
 677                 if need_space is not True and not need_space[1]:
 678                     yield (need_space[0],
 679                            "E225 missing whitespace around operator")
 680                 need_space = False
 681             elif text == '>' and prev_text in ('<', '-'):
 682                 # Tolerate the "<>" operator, even if running Python 3
 683                 # Deal with Python 3's annotated return value "->"
 684                 pass
 685             else:
 686                 if need_space is True or need_space[1]:
 687                     # A needed trailing space was not found
 688                     yield prev_end, "E225 missing whitespace around operator"
 689                 else:
 690                     code, optype = 'E226', 'arithmetic'
 691                     if prev_text == '%':
 692                         code, optype = 'E228', 'modulo'
 693                     elif prev_text not in ARITHMETIC_OP:
 694                         code, optype = 'E227', 'bitwise or shift'
 695                     yield (need_space[0], "%s missing whitespace "
 696                            "around %s operator" % (code, optype))
 697                 need_space = False
 698         elif token_type == tokenize.OP and prev_end is not None:
 699             if text == '=' and parens:
 700                 # Allow keyword args or defaults: foo(bar=None).
 701                 pass
 702             elif text in WS_NEEDED_OPERATORS:
 703                 need_space = True
 704             elif text in UNARY_OPERATORS:
 705                 # Check if the operator is being used as a binary operator
 706                 # Allow unary operators: -123, -x, +1.
 707                 # Allow argument unpacking: foo(*args, **kwargs).
 708                 if (prev_text in '}])' if prev_type == tokenize.OP
 709                         else prev_text not in KEYWORDS):
 710                     need_space = None
 711             elif text in WS_OPTIONAL_OPERATORS:
 712                 need_space = None
 713
 714             if need_space is None:
 715                 # Surrounding space is optional, but ensure that
 716                 # trailing space matches opening space
 717                 need_space = (prev_end, start != prev_end)
 718             elif need_space and start == prev_end:
 719                 # A needed opening space was not found
 720                 yield prev_end, "E225 missing whitespace around operator"
 721                 need_space = False
 722         prev_type = token_type
 723         prev_text = text
 724         prev_end = end
 725
 726
 727 def whitespace_around_comma(logical_line):
 728     r"""Avoid extraneous whitespace after a comma or a colon.
 729
 730     Note: these checks are disabled by default
 731
 732     Okay: a = (1, 2)
 733     E241: a = (1,  2)
 734     E242: a = (1,\t2)
 735     """
 736     line = logical_line
 737     for m in WHITESPACE_AFTER_COMMA_REGEX.finditer(line):
 738         found = m.start() + 1
 739         if '\t' in m.group():
 740             yield found, "E242 tab after '%s'" % m.group()[0]
 741         else:
 742             yield found, "E241 multiple spaces after '%s'" % m.group()[0]
 743
 744
 745 def whitespace_around_named_parameter_equals(logical_line, tokens):
 746     r"""Don't use spaces around the '=' sign in function arguments.
 747
 748     Don't use spaces around the '=' sign when used to indicate a
 749     keyword argument or a default parameter value.
 750
 751     Okay: def complex(real, imag=0.0):
 752     Okay: return magic(r=real, i=imag)
 753     Okay: boolean(a == b)
 754     Okay: boolean(a != b)
 755     Okay: boolean(a <= b)
 756     Okay: boolean(a >= b)
 757
 758     E251: def complex(real, imag = 0.0):
 759     E251: return magic(r = real, i = imag)
 760     """
 761     parens = 0
 762     no_space = False
 763     prev_end = None
 764     message = "E251 unexpected spaces around keyword / parameter equals"
 765     for token_type, text, start, end, line in tokens:
 766         if token_type == tokenize.NL:
 767             continue
 768         if no_space:
 769             no_space = False
 770             if start != prev_end:
 771                 yield (prev_end, message)
 772         if token_type == tokenize.OP:
 773             if text == '(':
 774                 parens += 1
 775             elif text == ')':
 776                 parens -= 1
 777             elif parens and text == '=':
 778                 no_space = True
 779                 if start != prev_end:
 780                     yield (prev_end, message)
 781         prev_end = end
 782
 783
 784 def whitespace_before_comment(logical_line, tokens):
 785     r"""Separate inline comments by at least two spaces.
 786
 787     An inline comment is a comment on the same line as a statement.  Inline
 788     comments should be separated by at least two spaces from the statement.
 789     They should start with a # and a single space.
 790
 791     Each line of a block comment starts with a # and a single space
 792     (unless it is indented text inside the comment).
 793
 794     Okay: x = x + 1  # Increment x
 795     Okay: x = x + 1    # Increment x
 796     Okay: # Block comment
 797     E261: x = x + 1 # Increment x
 798     E262: x = x + 1  #Increment x
 799     E262: x = x + 1  #  Increment x
 800     E265: #Block comment
 801     E266: ### Block comment
 802     """
 803     prev_end = (0, 0)
 804     for token_type, text, start, end, line in tokens:
 805         if token_type == tokenize.COMMENT:
 806             inline_comment = line[:start[1]].strip()
 807             if inline_comment:
 808                 if prev_end[0] == start[0] and start[1] < prev_end[1] + 2:
 809                     yield (prev_end,
 810                            "E261 at least two spaces before inline comment")
 811             symbol, sp, comment = text.partition(' ')
 812             bad_prefix = symbol not in '#:' and (symbol.lstrip('#')[:1] or '#')
 813             if inline_comment:
 814                 if bad_prefix or comment[:1] in WHITESPACE:
 815                     yield start, "E262 inline comment should start with '# '"
 816             elif bad_prefix and (bad_prefix != '!' or start[0] > 1):
 817                 if bad_prefix != '#':
 818                     yield start, "E265 block comment should start with '# '"
 819                 elif comment:
 820                     yield start, "E266 too many leading '#' for block comment"
 821         elif token_type != tokenize.NL:
 822             prev_end = end
 823
 824
 825 def imports_on_separate_lines(logical_line):
 826     r"""Imports should usually be on separate lines.
 827
 828     Okay: import os\nimport sys
 829     E401: import sys, os
 830
 831     Okay: from subprocess import Popen, PIPE
 832     Okay: from myclas import MyClass
 833     Okay: from foo.bar.yourclass import YourClass
 834     Okay: import myclass
 835     Okay: import foo.bar.yourclass
 836     """
 837     line = logical_line
 838     if line.startswith('import '):
 839         found = line.find(',')
 840         if -1 < found and ';' not in line[:found]:
 841             yield found, "E401 multiple imports on one line"
 842
 843
 844 def module_imports_on_top_of_file(
 845         logical_line, indent_level, checker_state, noqa):
 846     r"""Imports are always put at the top of the file, just after any module
 847     comments and docstrings, and before module globals and constants.
 848
 849     Okay: import os
 850     Okay: # this is a comment\nimport os
 851     Okay: '''this is a module docstring'''\nimport os
 852     Okay: r'''this is a module docstring'''\nimport os
 853     Okay: __version__ = "123"\nimport os
 854     E402: a=1\nimport os
 855     E402: 'One string'\n"Two string"\nimport os
 856     E402: a=1\nfrom sys import x
 857
 858     Okay: if x:\n    import os
 859     """
 860     def is_string_literal(line):
 861         if line[0] in 'uUbB':
 862             line = line[1:]
 863         if line and line[0] in 'rR':
 864             line = line[1:]
 865         return line and (line[0] == '"' or line[0] == "'")
 866
 867     if indent_level:  # Allow imports in conditional statements or functions
 868         return
 869     if not logical_line:  # Allow empty lines or comments
 870         return
 871     if noqa:
 872         return
 873     line = logical_line
 874     if line.startswith('import ') or line.startswith('from '):
 875         if checker_state.get('seen_non_imports', False):
 876             yield 0, "E402 import not at top of file"
 877     elif line.startswith('__version__ '):
 878         # These lines should be included after the module's docstring, before
 879         # any other code, separated by a blank line above and below.
 880         return
 881     elif is_string_literal(line):
 882         # The first literal is a docstring, allow it. Otherwise, report error.
 883         if checker_state.get('seen_docstring', False):
 884             checker_state['seen_non_imports'] = True
 885         else:
 886             checker_state['seen_docstring'] = True
 887     else:
 888         checker_state['seen_non_imports'] = True
 889
 890
 891 def compound_statements(logical_line):
 892     r"""Compound statements (on the same line) are generally discouraged.
 893
 894     While sometimes it's okay to put an if/for/while with a small body
 895     on the same line, never do this for multi-clause statements.
 896     Also avoid folding such long lines!
 897
 898     Always use a def statement instead of an assignment statement that
 899     binds a lambda expression directly to a name.
 900
 901     Okay: if foo == 'blah':\n    do_blah_thing()
 902     Okay: do_one()
 903     Okay: do_two()
 904     Okay: do_three()
 905
 906     E701: if foo == 'blah': do_blah_thing()
 907     E701: for x in lst: total += x
 908     E701: while t < 10: t = delay()
 909     E701: if foo == 'blah': do_blah_thing()
 910     E701: else: do_non_blah_thing()
 911     E701: try: something()
 912     E701: finally: cleanup()
 913     E701: if foo == 'blah': one(); two(); three()
 914     E702: do_one(); do_two(); do_three()
 915     E703: do_four();  # useless semicolon
 916     E704: def f(x): return 2*x
 917     E731: f = lambda x: 2*x
 918     """
 919     line = logical_line
 920     last_char = len(line) - 1
 921     found = line.find(':')
 922     while -1 < found < last_char:
 923         before = line[:found]
 924         if ((before.count('{') <= before.count('}') and   # {'a': 1} (dict)
 925              before.count('[') <= before.count(']') and   # [1:2] (slice)
 926              before.count('(') <= before.count(')'))):    # (annotation)
 927             lambda_kw = LAMBDA_REGEX.search(before)
 928             if lambda_kw:
 929                 before = line[:lambda_kw.start()].rstrip()
 930                 if before[-1:] == '=' and isidentifier(before[:-1].strip()):
 931                     yield 0, ("E731 do not assign a lambda expression, use a "
 932                               "def")
 933                 break
 934             if before.startswith('def '):
 935                 yield 0, "E704 multiple statements on one line (def)"
 936             else:
 937                 yield found, "E701 multiple statements on one line (colon)"
 938         found = line.find(':', found + 1)
 939     found = line.find(';')
 940     while -1 < found:
 941         if found < last_char:
 942             yield found, "E702 multiple statements on one line (semicolon)"
 943         else:
 944             yield found, "E703 statement ends with a semicolon"
 945         found = line.find(';', found + 1)
 946
 947
 948 def explicit_line_join(logical_line, tokens):
 949     r"""Avoid explicit line join between brackets.
 950
 951     The preferred way of wrapping long lines is by using Python's implied line
 952     continuation inside parentheses, brackets and braces.  Long lines can be
 953     broken over multiple lines by wrapping expressions in parentheses.  These
 954     should be used in preference to using a backslash for line continuation.
 955
 956     E502: aaa = [123, \\n       123]
 957     E502: aaa = ("bbb " \\n       "ccc")
 958
 959     Okay: aaa = [123,\n       123]
 960     Okay: aaa = ("bbb "\n       "ccc")
 961     Okay: aaa = "bbb " \\n    "ccc"
 962     """
 963     prev_start = prev_end = parens = 0
 964     for token_type, text, start, end, line in tokens:
 965         if start[0] != prev_start and parens and backslash:
 966             yield backslash, "E502 the backslash is redundant between brackets"
 967         if end[0] != prev_end:
 968             if line.rstrip('\r\n').endswith('\\'):
 969                 backslash = (end[0], len(line.splitlines()[-1]) - 1)
 970             else:
 971                 backslash = None
 972             prev_start = prev_end = end[0]
 973         else:
 974             prev_start = start[0]
 975         if token_type == tokenize.OP:
 976             if text in '([{':
 977                 parens += 1
 978             elif text in ')]}':
 979                 parens -= 1
 980
 981
 982 def comparison_to_singleton(logical_line, noqa):
 983     r"""Comparison to singletons should use "is" or "is not".
 984
 985     Comparisons to singletons like None should always be done
 986     with "is" or "is not", never the equality operators.
 987
 988     Okay: if arg is not None:
 989     E711: if arg != None:
 990     E711: if None == arg:
 991     E712: if arg == True:
 992     E712: if False == arg:
 993
 994     Also, beware of writing if x when you really mean if x is not None --
 995     e.g. when testing whether a variable or argument that defaults to None was
 996     set to some other value.  The other value might have a type (such as a
 997     container) that could be false in a boolean context!
 998     """
 999     match = not noqa and COMPARE_SINGLETON_REGEX.search(logical_line)
1000     if match:
1001         singleton = match.group(1) or match.group(3)
1002         same = (match.group(2) == '==')
1003
1004         msg = "'if cond is %s:'" % (('' if same else 'not ') + singleton)
1005         if singleton in ('None',):
1006             code = 'E711'
1007         else:
1008             code = 'E712'
1009             nonzero = ((singleton == 'True' and same) or
1010                        (singleton == 'False' and not same))
1011             msg += " or 'if %scond:'" % ('' if nonzero else 'not ')
1012         yield match.start(2), ("%s comparison to %s should be %s" %
1013                                (code, singleton, msg))
1014
1015
1016 def comparison_negative(logical_line):
1017     r"""Negative comparison should be done using "not in" and "is not".
1018
1019     Okay: if x not in y:\n    pass
1020     Okay: assert (X in Y or X is Z)
1021     Okay: if not (X in Y):\n    pass
1022     Okay: zz = x is not y
1023     E713: Z = not X in Y
1024     E713: if not X.B in Y:\n    pass
1025     E714: if not X is Y:\n    pass
1026     E714: Z = not X.B is Y
1027     """
1028     match = COMPARE_NEGATIVE_REGEX.search(logical_line)
1029     if match:
1030         pos = match.start(1)
1031         if match.group(2) == 'in':
1032             yield pos, "E713 test for membership should be 'not in'"
1033         else:
1034             yield pos, "E714 test for object identity should be 'is not'"
1035
1036
1037 def comparison_type(logical_line):
1038     r"""Object type comparisons should always use isinstance().
1039
1040     Do not compare types directly.
1041
1042     Okay: if isinstance(obj, int):
1043     E721: if type(obj) is type(1):
1044
1045     When checking if an object is a string, keep in mind that it might be a
1046     unicode string too! In Python 2.3, str and unicode have a common base
1047     class, basestring, so you can do:
1048
1049     Okay: if isinstance(obj, basestring):
1050     Okay: if type(a1) is type(b1):
1051     """
1052     match = COMPARE_TYPE_REGEX.search(logical_line)
1053     if match:
1054         inst = match.group(1)
1055         if inst and isidentifier(inst) and inst not in SINGLETONS:
1056             return  # Allow comparison for types which are not obvious
1057         yield match.start(), "E721 do not compare types, use 'isinstance()'"
1058
1059
1060 def python_3000_has_key(logical_line, noqa):
1061     r"""The {}.has_key() method is removed in Python 3: use the 'in' operator.
1062
1063     Okay: if "alph" in d:\n    print d["alph"]
1064     W601: assert d.has_key('alph')
1065     """
1066     pos = logical_line.find('.has_key(')
1067     if pos > -1 and not noqa:
1068         yield pos, "W601 .has_key() is deprecated, use 'in'"
1069
1070
1071 def python_3000_raise_comma(logical_line):
1072     r"""When raising an exception, use "raise ValueError('message')".
1073
1074     The older form is removed in Python 3.
1075
1076     Okay: raise DummyError("Message")
1077     W602: raise DummyError, "Message"
1078     """
1079     match = RAISE_COMMA_REGEX.match(logical_line)
1080     if match and not RERAISE_COMMA_REGEX.match(logical_line):
1081         yield match.end() - 1, "W602 deprecated form of raising exception"
1082
1083
1084 def python_3000_not_equal(logical_line):
1085     r"""New code should always use != instead of <>.
1086
1087     The older syntax is removed in Python 3.
1088
1089     Okay: if a != 'no':
1090     W603: if a <> 'no':
1091     """
1092     pos = logical_line.find('<>')
1093     if pos > -1:
1094         yield pos, "W603 '<>' is deprecated, use '!='"
1095
1096
1097 def python_3000_backticks(logical_line):
1098     r"""Backticks are removed in Python 3: use repr() instead.
1099
1100     Okay: val = repr(1 + 2)
1101     W604: val = `1 + 2`
1102     """
1103     pos = logical_line.find('`')
1104     if pos > -1:
1105         yield pos, "W604 backticks are deprecated, use 'repr()'"
1106
1107
1108 ##############################################################################
1109 # Helper functions
1110 ##############################################################################
1111
1112
1113 if '' == ''.encode():
1114     # Python 2: implicit encoding.
1115     def readlines(filename):
1116         """Read the source code."""
1117         with open(filename, 'rU') as f:
1118             return f.readlines()
1119     isidentifier = re.compile(r'[a-zA-Z_]\w*$').match
1120     stdin_get_value = sys.stdin.read
1121 else:
1122     # Python 3
1123     def readlines(filename):
1124         """Read the source code."""
1125         try:
1126             with open(filename, 'rb') as f:
1127                 (coding, lines) = tokenize.detect_encoding(f.readline)
1128                 f = TextIOWrapper(f, coding, line_buffering=True)
1129                 return [l.decode(coding) for l in lines] + f.readlines()
1130         except (LookupError, SyntaxError, UnicodeError):
1131             # Fall back if file encoding is improperly declared
1132             with open(filename, encoding='latin-1') as f:
1133                 return f.readlines()
1134     isidentifier = str.isidentifier
1135
1136     def stdin_get_value():
1137         return TextIOWrapper(sys.stdin.buffer, errors='ignore').read()
1138 noqa = re.compile(r'# no(?:qa|pep8)\b', re.I).search
1139
1140
1141 def expand_indent(line):
1142     r"""Return the amount of indentation.
1143
1144     Tabs are expanded to the next multiple of 8.
1145
1146     >>> expand_indent('    ')
1147     4
1148     >>> expand_indent('\t')
1149     8
1150     >>> expand_indent('       \t')
1151     8
1152     >>> expand_indent('        \t')
1153     16
1154     """
1155     if '\t' not in line:
1156         return len(line) - len(line.lstrip())
1157     result = 0
1158     for char in line:
1159         if char == '\t':
1160             result = result // 8 * 8 + 8
1161         elif char == ' ':
1162             result += 1
1163         else:
1164             break
1165     return result
1166
1167
1168 def mute_string(text):
1169     """Replace contents with 'xxx' to prevent syntax matching.
1170
1171     >>> mute_string('"abc"')
1172     '"xxx"'
1173     >>> mute_string("'''abc'''")
1174     "'''xxx'''"
1175     >>> mute_string("r'abc'")
1176     "r'xxx'"
1177     """
1178     # String modifiers (e.g. u or r)
1179     start = text.index(text[-1]) + 1
1180     end = len(text) - 1
1181     # Triple quotes
1182     if text[-3:] in ('"""', "'''"):
1183         start += 2
1184         end -= 2
1185     return text[:start] + 'x' * (end - start) + text[end:]
1186
1187
1188 def parse_udiff(diff, patterns=None, parent='.'):
1189     """Return a dictionary of matching lines."""
1190     # For each file of the diff, the entry key is the filename,
1191     # and the value is a set of row numbers to consider.
1192     rv = {}
1193     path = nrows = None
1194     for line in diff.splitlines():
1195         if nrows:
1196             if line[:1] != '-':
1197                 nrows -= 1
1198             continue
1199         if line[:3] == '@@ ':
1200             hunk_match = HUNK_REGEX.match(line)
1201             (row, nrows) = [int(g or '1') for g in hunk_match.groups()]
1202             rv[path].update(range(row, row + nrows))
1203         elif line[:3] == '+++':
1204             path = line[4:].split('\t', 1)[0]
1205             if path[:2] == 'b/':
1206                 path = path[2:]
1207             rv[path] = set()
1208     return dict([(os.path.join(parent, path), rows)
1209                  for (path, rows) in rv.items()
1210                  if rows and filename_match(path, patterns)])
1211
1212
1213 def normalize_paths(value, parent=os.curdir):
1214     """Parse a comma-separated list of paths.
1215
1216     Return a list of absolute paths.
1217     """
1218     if not value:
1219         return []
1220     if isinstance(value, list):
1221         return value
1222     paths = []
1223     for path in value.split(','):
1224         path = path.strip()
1225         if '/' in path:
1226             path = os.path.abspath(os.path.join(parent, path))
1227         paths.append(path.rstrip('/'))
1228     return paths
1229
1230
1231 def filename_match(filename, patterns, default=True):
1232     """Check if patterns contains a pattern that matches filename.
1233
1234     If patterns is unspecified, this always returns True.
1235     """
1236     if not patterns:
1237         return default
1238     return any(fnmatch(filename, pattern) for pattern in patterns)
1239
1240
1241 if COMMENT_WITH_NL:
1242     def _is_eol_token(token):
1243         return (token[0] in NEWLINE or
1244                 (token[0] == tokenize.COMMENT and token[1] == token[4]))
1245 else:
1246     def _is_eol_token(token):
1247         return token[0] in NEWLINE
1248
1249
1250 ##############################################################################
1251 # Framework to run all checks
1252 ##############################################################################
1253
1254
1255 _checks = {'physical_line': {}, 'logical_line': {}, 'tree': {}}
1256
1257
1258 def register_check(check, codes=None):
1259     """Register a new check object."""
1260     def _add_check(check, kind, codes, args):
1261         if check in _checks[kind]:
1262             _checks[kind][check][0].extend(codes or [])
1263         else:
1264             _checks[kind][check] = (codes or [''], args)
1265     if inspect.isfunction(check):
1266         args = inspect.getargspec(check)[0]
1267         if args and args[0] in ('physical_line', 'logical_line'):
1268             if codes is None:
1269                 codes = ERRORCODE_REGEX.findall(check.__doc__ or '')
1270             _add_check(check, args[0], codes, args)
1271     elif inspect.isclass(check):
1272         if inspect.getargspec(check.__init__)[0][:2] == ['self', 'tree']:
1273             _add_check(check, 'tree', codes, None)
1274
1275
1276 def init_checks_registry():
1277     """Register all globally visible functions.
1278
1279     The first argument name is either 'physical_line' or 'logical_line'.
1280     """
1281     mod = inspect.getmodule(register_check)
1282     for (name, function) in inspect.getmembers(mod, inspect.isfunction):
1283         register_check(function)
1284 init_checks_registry()
1285
1286
1287 class Checker(object):
1288     """Load a Python source file, tokenize it, check coding style."""
1289
1290     def __init__(self, filename=None, lines=None,
1291                  options=None, report=None, **kwargs):
1292         if options is None:
1293             options = StyleGuide(kwargs).options
1294         else:
1295             assert not kwargs
1296         self._io_error = None
1297         self._physical_checks = options.physical_checks
1298         self._logical_checks = options.logical_checks
1299         self._ast_checks = options.ast_checks
1300         self.max_line_length = options.max_line_length
1301         self.multiline = False  # in a multiline string?
1302         self.hang_closing = options.hang_closing
1303         self.verbose = options.verbose
1304         self.filename = filename
1305         # Dictionary where a checker can store its custom state.
1306         self._checker_states = {}
1307         if filename is None:
1308             self.filename = 'stdin'
1309             self.lines = lines or []
1310         elif filename == '-':
1311             self.filename = 'stdin'
1312             self.lines = stdin_get_value().splitlines(True)
1313         elif lines is None:
1314             try:
1315                 self.lines = readlines(filename)
1316             except IOError:
1317                 (exc_type, exc) = sys.exc_info()[:2]
1318                 self._io_error = '%s: %s' % (exc_type.__name__, exc)
1319                 self.lines = []
1320         else:
1321             self.lines = lines
1322         if self.lines:
1323             ord0 = ord(self.lines[0][0])
1324             if ord0 in (0xef, 0xfeff):  # Strip the UTF-8 BOM
1325                 if ord0 == 0xfeff:
1326                     self.lines[0] = self.lines[0][1:]
1327                 elif self.lines[0][:3] == '\xef\xbb\xbf':
1328                     self.lines[0] = self.lines[0][3:]
1329         self.report = report or options.report
1330         self.report_error = self.report.error
1331
1332     def report_invalid_syntax(self):
1333         """Check if the syntax is valid."""
1334         (exc_type, exc) = sys.exc_info()[:2]
1335         if len(exc.args) > 1:
1336             offset = exc.args[1]
1337             if len(offset) > 2:
1338                 offset = offset[1:3]
1339         else:
1340             offset = (1, 0)
1341         self.report_error(offset[0], offset[1] or 0,
1342                           'E901 %s: %s' % (exc_type.__name__, exc.args[0]),
1343                           self.report_invalid_syntax)
1344
1345     def readline(self):
1346         """Get the next line from the input buffer."""
1347         if self.line_number >= self.total_lines:
1348             return ''
1349         line = self.lines[self.line_number]
1350         self.line_number += 1
1351         if self.indent_char is None and line[:1] in WHITESPACE:
1352             self.indent_char = line[0]
1353         return line
1354
1355     def run_check(self, check, argument_names):
1356         """Run a check plugin."""
1357         arguments = []
1358         for name in argument_names:
1359             arguments.append(getattr(self, name))
1360         return check(*arguments)
1361
1362     def init_checker_state(self, name, argument_names):
1363         """ Prepares a custom state for the specific checker plugin."""
1364         if 'checker_state' in argument_names:
1365             self.checker_state = self._checker_states.setdefault(name, {})
1366
1367     def check_physical(self, line):
1368         """Run all physical checks on a raw input line."""
1369         self.physical_line = line
1370         for name, check, argument_names in self._physical_checks:
1371             self.init_checker_state(name, argument_names)
1372             result = self.run_check(check, argument_names)
1373             if result is not None:
1374                 (offset, text) = result
1375                 self.report_error(self.line_number, offset, text, check)
1376                 if text[:4] == 'E101':
1377                     self.indent_char = line[0]
1378
1379     def build_tokens_line(self):
1380         """Build a logical line from tokens."""
1381         logical = []
1382         comments = []
1383         length = 0
1384         prev_row = prev_col = mapping = None
1385         for token_type, text, start, end, line in self.tokens:
1386             if token_type in SKIP_TOKENS:
1387                 continue
1388             if not mapping:
1389                 mapping = [(0, start)]
1390             if token_type == tokenize.COMMENT:
1391                 comments.append(text)
1392                 continue
1393             if token_type == tokenize.STRING:
1394                 text = mute_string(text)
1395             if prev_row:
1396                 (start_row, start_col) = start
1397                 if prev_row != start_row:    # different row
1398                     prev_text = self.lines[prev_row - 1][prev_col - 1]
1399                     if prev_text == ',' or (prev_text not in '{[('
1400                                             and text not in '}])'):
1401                         text = ' ' + text
1402                 elif prev_col != start_col:  # different column
1403                     text = line[prev_col:start_col] + text
1404             logical.append(text)
1405             length += len(text)
1406             mapping.append((length, end))
1407             (prev_row, prev_col) = end
1408         self.logical_line = ''.join(logical)
1409         self.noqa = comments and noqa(''.join(comments))
1410         return mapping
1411
1412     def check_logical(self):
1413         """Build a line from tokens and run all logical checks on it."""
1414         self.report.increment_logical_line()
1415         mapping = self.build_tokens_line()
1416
1417         if not mapping:
1418             return
1419
1420         (start_row, start_col) = mapping[0][1]
1421         start_line = self.lines[start_row - 1]
1422         self.indent_level = expand_indent(start_line[:start_col])
1423         if self.blank_before < self.blank_lines:
1424             self.blank_before = self.blank_lines
1425         if self.verbose >= 2:
1426             print(self.logical_line[:80].rstrip())
1427         for name, check, argument_names in self._logical_checks:
1428             if self.verbose >= 4:
1429                 print('   ' + name)
1430             self.init_checker_state(name, argument_names)
1431             for offset, text in self.run_check(check, argument_names) or ():
1432                 if not isinstance(offset, tuple):
1433                     for token_offset, pos in mapping:
1434                         if offset <= token_offset:
1435                             break
1436                     offset = (pos[0], pos[1] + offset - token_offset)
1437                 self.report_error(offset[0], offset[1], text, check)
1438         if self.logical_line:
1439             self.previous_indent_level = self.indent_level
1440             self.previous_logical = self.logical_line
1441         self.blank_lines = 0
1442         self.tokens = []
1443
1444     def check_ast(self):
1445         """Build the file's AST and run all AST checks."""
1446         try:
1447             tree = compile(''.join(self.lines), '', 'exec', PyCF_ONLY_AST)
1448         except (SyntaxError, TypeError):
1449             return self.report_invalid_syntax()
1450         for name, cls, __ in self._ast_checks:
1451             checker = cls(tree, self.filename)
1452             for lineno, offset, text, check in checker.run():
1453                 if not self.lines or not noqa(self.lines[lineno - 1]):
1454                     self.report_error(lineno, offset, text, check)
1455
1456     def generate_tokens(self):
1457         """Tokenize the file, run physical line checks and yield tokens."""
1458         if self._io_error:
1459             self.report_error(1, 0, 'E902 %s' % self._io_error, readlines)
1460         tokengen = tokenize.generate_tokens(self.readline)
1461         try:
1462             for token in tokengen:
1463                 if token[2][0] > self.total_lines:
1464                     return
1465                 self.maybe_check_physical(token)
1466                 yield token
1467         except (SyntaxError, tokenize.TokenError):
1468             self.report_invalid_syntax()
1469
1470     def maybe_check_physical(self, token):
1471         """If appropriate (based on token), check current physical line(s)."""
1472         # Called after every token, but act only on end of line.
1473         if _is_eol_token(token):
1474             # Obviously, a newline token ends a single physical line.
1475             self.check_physical(token[4])
1476         elif token[0] == tokenize.STRING and '\n' in token[1]:
1477             # Less obviously, a string that contains newlines is a
1478             # multiline string, either triple-quoted or with internal
1479             # newlines backslash-escaped. Check every physical line in the
1480             # string *except* for the last one: its newline is outside of
1481             # the multiline string, so we consider it a regular physical
1482             # line, and will check it like any other physical line.
1483             #
1484             # Subtleties:
1485             # - we don't *completely* ignore the last line; if it contains
1486             #   the magical "# noqa" comment, we disable all physical
1487             #   checks for the entire multiline string
1488             # - have to wind self.line_number back because initially it
1489             #   points to the last line of the string, and we want
1490             #   check_physical() to give accurate feedback
1491             if noqa(token[4]):
1492                 return
1493             self.multiline = True
1494             self.line_number = token[2][0]
1495             for line in token[1].split('\n')[:-1]:
1496                 self.check_physical(line + '\n')
1497                 self.line_number += 1
1498             self.multiline = False
1499
1500     def check_all(self, expected=None, line_offset=0):
1501         """Run all checks on the input file."""
1502         self.report.init_file(self.filename, self.lines, expected, line_offset)
1503         self.total_lines = len(self.lines)
1504         if self._ast_checks:
1505             self.check_ast()
1506         self.line_number = 0
1507         self.indent_char = None
1508         self.indent_level = self.previous_indent_level = 0
1509         self.previous_logical = ''
1510         self.tokens = []
1511         self.blank_lines = self.blank_before = 0
1512         parens = 0
1513         for token in self.generate_tokens():
1514             self.tokens.append(token)
1515             token_type, text = token[0:2]
1516             if self.verbose >= 3:
1517                 if token[2][0] == token[3][0]:
1518                     pos = '[%s:%s]' % (token[2][1] or '', token[3][1])
1519                 else:
1520                     pos = 'l.%s' % token[3][0]
1521                 print('l.%s\t%s\t%s\t%r' %
1522                       (token[2][0], pos, tokenize.tok_name[token[0]], text))
1523             if token_type == tokenize.OP:
1524                 if text in '([{':
1525                     parens += 1
1526                 elif text in '}])':
1527                     parens -= 1
1528             elif not parens:
1529                 if token_type in NEWLINE:
1530                     if token_type == tokenize.NEWLINE:
1531                         self.check_logical()
1532                         self.blank_before = 0
1533                     elif len(self.tokens) == 1:
1534                         # The physical line contains only this token.
1535                         self.blank_lines += 1
1536                         del self.tokens[0]
1537                     else:
1538                         self.check_logical()
1539                 elif COMMENT_WITH_NL and token_type == tokenize.COMMENT:
1540                     if len(self.tokens) == 1:
1541                         # The comment also ends a physical line
1542                         token = list(token)
1543                         token[1] = text.rstrip('\r\n')
1544                         token[3] = (token[2][0], token[2][1] + len(token[1]))
1545                         self.tokens = [tuple(token)]
1546                         self.check_logical()
1547         if self.tokens:
1548             self.check_physical(self.lines[-1])
1549             self.check_logical()
1550         return self.report.get_file_results()
1551
1552
1553 class BaseReport(object):
1554     """Collect the results of the checks."""
1555
1556     print_filename = False
1557
1558     def __init__(self, options):
1559         self._benchmark_keys = options.benchmark_keys
1560         self._ignore_code = options.ignore_code
1561         # Results
1562         self.elapsed = 0
1563         self.total_errors = 0
1564         self.counters = dict.fromkeys(self._benchmark_keys, 0)
1565         self.messages = {}
1566
1567     def start(self):
1568         """Start the timer."""
1569         self._start_time = time.time()
1570
1571     def stop(self):
1572         """Stop the timer."""
1573         self.elapsed = time.time() - self._start_time
1574
1575     def init_file(self, filename, lines, expected, line_offset):
1576         """Signal a new file."""
1577         self.filename = filename
1578         self.lines = lines
1579         self.expected = expected or ()
1580         self.line_offset = line_offset
1581         self.file_errors = 0
1582         self.counters['files'] += 1
1583         self.counters['physical lines'] += len(lines)
1584
1585     def increment_logical_line(self):
1586         """Signal a new logical line."""
1587         self.counters['logical lines'] += 1
1588
1589     def error(self, line_number, offset, text, check):
1590         """Report an error, according to options."""
1591         code = text[:4]
1592         if self._ignore_code(code):
1593             return
1594         if code in self.counters:
1595             self.counters[code] += 1
1596         else:
1597             self.counters[code] = 1
1598             self.messages[code] = text[5:]
1599         # Don't care about expected errors or warnings
1600         if code in self.expected:
1601             return
1602         if self.print_filename and not self.file_errors:
1603             print(self.filename)
1604         self.file_errors += 1
1605         self.total_errors += 1
1606         return code
1607
1608     def get_file_results(self):
1609         """Return the count of errors and warnings for this file."""
1610         return self.file_errors
1611
1612     def get_count(self, prefix=''):
1613         """Return the total count of errors and warnings."""
1614         return sum([self.counters[key]
1615                     for key in self.messages if key.startswith(prefix)])
1616
1617     def get_statistics(self, prefix=''):
1618         """Get statistics for message codes that start with the prefix.
1619
1620         prefix='' matches all errors and warnings
1621         prefix='E' matches all errors
1622         prefix='W' matches all warnings
1623         prefix='E4' matches all errors that have to do with imports
1624         """
1625         return ['%-7s %s %s' % (self.counters[key], key, self.messages[key])
1626                 for key in sorted(self.messages) if key.startswith(prefix)]
1627
1628     def print_statistics(self, prefix=''):
1629         """Print overall statistics (number of errors and warnings)."""
1630         for line in self.get_statistics(prefix):
1631             print(line)
1632
1633     def print_benchmark(self):
1634         """Print benchmark numbers."""
1635         print('%-7.2f %s' % (self.elapsed, 'seconds elapsed'))
1636         if self.elapsed:
1637             for key in self._benchmark_keys:
1638                 print('%-7d %s per second (%d total)' %
1639                       (self.counters[key] / self.elapsed, key,
1640                        self.counters[key]))
1641
1642
1643 class FileReport(BaseReport):
1644     """Collect the results of the checks and print only the filenames."""
1645     print_filename = True
1646
1647
1648 class StandardReport(BaseReport):
1649     """Collect and print the results of the checks."""
1650
1651     def __init__(self, options):
1652         super(StandardReport, self).__init__(options)
1653         self._fmt = REPORT_FORMAT.get(options.format.lower(),
1654                                       options.format)
1655         self._repeat = options.repeat
1656         self._show_source = options.show_source
1657         self._show_pep8 = options.show_pep8
1658
1659     def init_file(self, filename, lines, expected, line_offset):
1660         """Signal a new file."""
1661         self._deferred_print = []
1662         return super(StandardReport, self).init_file(
1663             filename, lines, expected, line_offset)
1664
1665     def error(self, line_number, offset, text, check):
1666         """Report an error, according to options."""
1667         code = super(StandardReport, self).error(line_number, offset,
1668                                                  text, check)
1669         if code and (self.counters[code] == 1 or self._repeat):
1670             self._deferred_print.append(
1671                 (line_number, offset, code, text[5:], check.__doc__))
1672         return code
1673
1674     def get_file_results(self):
1675         """Print the result and return the overall count for this file."""
1676         self._deferred_print.sort()
1677         for line_number, offset, code, text, doc in self._deferred_print:
1678             print(self._fmt % {
1679                 'path': self.filename,
1680                 'row': self.line_offset + line_number, 'col': offset + 1,
1681                 'code': code, 'text': text,
1682             })
1683             if self._show_source:
1684                 if line_number > len(self.lines):
1685                     line = ''
1686                 else:
1687                     line = self.lines[line_number - 1]
1688                 print(line.rstrip())
1689                 print(re.sub(r'\S', ' ', line[:offset]) + '^')
1690             if self._show_pep8 and doc:
1691                 print('    ' + doc.strip())
1692         return self.file_errors
1693
1694
1695 class DiffReport(StandardReport):
1696     """Collect and print the results for the changed lines only."""
1697
1698     def __init__(self, options):
1699         super(DiffReport, self).__init__(options)
1700         self._selected = options.selected_lines
1701
1702     def error(self, line_number, offset, text, check):
1703         if line_number not in self._selected[self.filename]:
1704             return
1705         return super(DiffReport, self).error(line_number, offset, text, check)
1706
1707
1708 class StyleGuide(object):
1709     """Initialize a PEP-8 instance with few options."""
1710
1711     def __init__(self, *args, **kwargs):
1712         # build options from the command line
1713         self.checker_class = kwargs.pop('checker_class', Checker)
1714         parse_argv = kwargs.pop('parse_argv', False)
1715         config_file = kwargs.pop('config_file', None)
1716         parser = kwargs.pop('parser', None)
1717         # build options from dict
1718         options_dict = dict(*args, **kwargs)
1719         arglist = None if parse_argv else options_dict.get('paths', None)
1720         options, self.paths = process_options(
1721             arglist, parse_argv, config_file, parser)
1722         if options_dict:
1723             options.__dict__.update(options_dict)
1724             if 'paths' in options_dict:
1725                 self.paths = options_dict['paths']
1726
1727         self.runner = self.input_file
1728         self.options = options
1729
1730         if not options.reporter:
1731             options.reporter = BaseReport if options.quiet else StandardReport
1732
1733         options.select = tuple(options.select or ())
1734         if not (options.select or options.ignore or
1735                 options.testsuite or options.doctest) and DEFAULT_IGNORE:
1736             # The default choice: ignore controversial checks
1737             options.ignore = tuple(DEFAULT_IGNORE.split(','))
1738         else:
1739             # Ignore all checks which are not explicitly selected
1740             options.ignore = ('',) if options.select else tuple(options.ignore)
1741         options.benchmark_keys = BENCHMARK_KEYS[:]
1742         options.ignore_code = self.ignore_code
1743         options.physical_checks = self.get_checks('physical_line')
1744         options.logical_checks = self.get_checks('logical_line')
1745         options.ast_checks = self.get_checks('tree')
1746         self.init_report()
1747
1748     def init_report(self, reporter=None):
1749         """Initialize the report instance."""
1750         self.options.report = (reporter or self.options.reporter)(self.options)
1751         return self.options.report
1752
1753     def check_files(self, paths=None):
1754         """Run all checks on the paths."""
1755         if paths is None:
1756             paths = self.paths
1757         report = self.options.report
1758         runner = self.runner
1759         report.start()
1760         try:
1761             for path in paths:
1762                 if os.path.isdir(path):
1763                     self.input_dir(path)
1764                 elif not self.excluded(path):
1765                     runner(path)
1766         except KeyboardInterrupt:
1767             print('... stopped')
1768         report.stop()
1769         return report
1770
1771     def input_file(self, filename, lines=None, expected=None, line_offset=0):
1772         """Run all checks on a Python source file."""
1773         if self.options.verbose:
1774             print('checking %s' % filename)
1775         fchecker = self.checker_class(
1776             filename, lines=lines, options=self.options)
1777         return fchecker.check_all(expected=expected, line_offset=line_offset)
1778
1779     def input_dir(self, dirname):
1780         """Check all files in this directory and all subdirectories."""
1781         dirname = dirname.rstrip('/')
1782         if self.excluded(dirname):
1783             return 0
1784         counters = self.options.report.counters
1785         verbose = self.options.verbose
1786         filepatterns = self.options.filename
1787         runner = self.runner
1788         for root, dirs, files in os.walk(dirname):
1789             if verbose:
1790                 print('directory ' + root)
1791             counters['directories'] += 1
1792             for subdir in sorted(dirs):
1793                 if self.excluded(subdir, root):
1794                     dirs.remove(subdir)
1795             for filename in sorted(files):
1796                 # contain a pattern that matches?
1797                 if ((filename_match(filename, filepatterns) and
1798                      not self.excluded(filename, root))):
1799                     runner(os.path.join(root, filename))
1800
1801     def excluded(self, filename, parent=None):
1802         """Check if the file should be excluded.
1803
1804         Check if 'options.exclude' contains a pattern that matches filename.
1805         """
1806         if not self.options.exclude:
1807             return False
1808         basename = os.path.basename(filename)
1809         if filename_match(basename, self.options.exclude):
1810             return True
1811         if parent:
1812             filename = os.path.join(parent, filename)
1813         filename = os.path.abspath(filename)
1814         return filename_match(filename, self.options.exclude)
1815
1816     def ignore_code(self, code):
1817         """Check if the error code should be ignored.
1818
1819         If 'options.select' contains a prefix of the error code,
1820         return False.  Else, if 'options.ignore' contains a prefix of
1821         the error code, return True.
1822         """
1823         if len(code) < 4 and any(s.startswith(code)
1824                                  for s in self.options.select):
1825             return False
1826         return (code.startswith(self.options.ignore) and
1827                 not code.startswith(self.options.select))
1828
1829     def get_checks(self, argument_name):
1830         """Get all the checks for this category.
1831
1832         Find all globally visible functions where the first argument name
1833         starts with argument_name and which contain selected tests.
1834         """
1835         checks = []
1836         for check, attrs in _checks[argument_name].items():
1837             (codes, args) = attrs
1838             if any(not (code and self.ignore_code(code)) for code in codes):
1839                 checks.append((check.__name__, check, args))
1840         return sorted(checks)
1841
1842
1843 def get_parser(prog='pep8', version=__version__):
1844     parser = OptionParser(prog=prog, version=version,
1845                           usage="%prog [options] input ...")
1846     parser.config_options = [
1847         'exclude', 'filename', 'select', 'ignore', 'max-line-length',
1848         'hang-closing', 'count', 'format', 'quiet', 'show-pep8',
1849         'show-source', 'statistics', 'verbose']
1850     parser.add_option('-v', '--verbose', default=0, action='count',
1851                       help="print status messages, or debug with -vv")
1852     parser.add_option('-q', '--quiet', default=0, action='count',
1853                       help="report only file names, or nothing with -qq")
1854     parser.add_option('-r', '--repeat', default=True, action='store_true',
1855                       help="(obsolete) show all occurrences of the same error")
1856     parser.add_option('--first', action='store_false', dest='repeat',
1857                       help="show first occurrence of each error")
1858     parser.add_option('--exclude', metavar='patterns', default=DEFAULT_EXCLUDE,
1859                       help="exclude files or directories which match these "
1860                            "comma separated patterns (default: %default)")
1861     parser.add_option('--filename', metavar='patterns', default='*.py',
1862                       help="when parsing directories, only check filenames "
1863                            "matching these comma separated patterns "
1864                            "(default: %default)")
1865     parser.add_option('--select', metavar='errors', default='',
1866                       help="select errors and warnings (e.g. E,W6)")
1867     parser.add_option('--ignore', metavar='errors', default='',
1868                       help="skip errors and warnings (e.g. E4,W)")
1869     parser.add_option('--show-source', action='store_true',
1870                       help="show source code for each error")
1871     parser.add_option('--show-pep8', action='store_true',
1872                       help="show text of PEP 8 for each error "
1873                            "(implies --first)")
1874     parser.add_option('--statistics', action='store_true',
1875                       help="count errors and warnings")
1876     parser.add_option('--count', action='store_true',
1877                       help="print total number of errors and warnings "
1878                            "to standard error and set exit code to 1 if "
1879                            "total is not null")
1880     parser.add_option('--max-line-length', type='int', metavar='n',
1881                       default=MAX_LINE_LENGTH,
1882                       help="set maximum allowed line length "
1883                            "(default: %default)")
1884     parser.add_option('--hang-closing', action='store_true',
1885                       help="hang closing bracket instead of matching "
1886                            "indentation of opening bracket's line")
1887     parser.add_option('--format', metavar='format', default='default',
1888                       help="set the error format [default|pylint|<custom>]")
1889     parser.add_option('--diff', action='store_true',
1890                       help="report only lines changed according to the "
1891                            "unified diff received on STDIN")
1892     group = parser.add_option_group("Testing Options")
1893     if os.path.exists(TESTSUITE_PATH):
1894         group.add_option('--testsuite', metavar='dir',
1895                          help="run regression tests from dir")
1896         group.add_option('--doctest', action='store_true',
1897                          help="run doctest on myself")
1898     group.add_option('--benchmark', action='store_true',
1899                      help="measure processing speed")
1900     return parser
1901
1902
1903 def read_config(options, args, arglist, parser):
1904     """Read both user configuration and local configuration."""
1905     config = RawConfigParser()
1906
1907     user_conf = options.config
1908     if user_conf and os.path.isfile(user_conf):
1909         if options.verbose:
1910             print('user configuration: %s' % user_conf)
1911         config.read(user_conf)
1912
1913     local_dir = os.curdir
1914     parent = tail = args and os.path.abspath(os.path.commonprefix(args))
1915     while tail:
1916         if config.read([os.path.join(parent, fn) for fn in PROJECT_CONFIG]):
1917             local_dir = parent
1918             if options.verbose:
1919                 print('local configuration: in %s' % parent)
1920             break
1921         (parent, tail) = os.path.split(parent)
1922
1923     pep8_section = parser.prog
1924     if config.has_section(pep8_section):
1925         option_list = dict([(o.dest, o.type or o.action)
1926                             for o in parser.option_list])
1927
1928         # First, read the default values
1929         (new_options, __) = parser.parse_args([])
1930
1931         # Second, parse the configuration
1932         for opt in config.options(pep8_section):
1933             if opt.replace('_', '-') not in parser.config_options:
1934                 print("  unknown option '%s' ignored" % opt)
1935                 continue
1936             if options.verbose > 1:
1937                 print("  %s = %s" % (opt, config.get(pep8_section, opt)))
1938             normalized_opt = opt.replace('-', '_')
1939             opt_type = option_list[normalized_opt]
1940             if opt_type in ('int', 'count'):
1941                 value = config.getint(pep8_section, opt)
1942             elif opt_type == 'string':
1943                 value = config.get(pep8_section, opt)
1944                 if normalized_opt == 'exclude':
1945                     value = normalize_paths(value, local_dir)
1946             else:
1947                 assert opt_type in ('store_true', 'store_false')
1948                 value = config.getboolean(pep8_section, opt)
1949             setattr(new_options, normalized_opt, value)
1950
1951         # Third, overwrite with the command-line options
1952         (options, __) = parser.parse_args(arglist, values=new_options)
1953     options.doctest = options.testsuite = False
1954     return options
1955
1956
1957 def process_options(arglist=None, parse_argv=False, config_file=None,
1958                     parser=None):
1959     """Process options passed either via arglist or via command line args."""
1960     if not parser:
1961         parser = get_parser()
1962     if not parser.has_option('--config'):
1963         if config_file is True:
1964             config_file = DEFAULT_CONFIG
1965         group = parser.add_option_group("Configuration", description=(
1966             "The project options are read from the [%s] section of the "
1967             "tox.ini file or the setup.cfg file located in any parent folder "
1968             "of the path(s) being processed.  Allowed options are: %s." %
1969             (parser.prog, ', '.join(parser.config_options))))
1970         group.add_option('--config', metavar='path', default=config_file,
1971                          help="user config file location (default: %default)")
1972     # Don't read the command line if the module is used as a library.
1973     if not arglist and not parse_argv:
1974         arglist = []
1975     # If parse_argv is True and arglist is None, arguments are
1976     # parsed from the command line (sys.argv)
1977     (options, args) = parser.parse_args(arglist)
1978     options.reporter = None
1979
1980     if options.ensure_value('testsuite', False):
1981         args.append(options.testsuite)
1982     elif not options.ensure_value('doctest', False):
1983         if parse_argv and not args:
1984             if options.diff or any(os.path.exists(name)
1985                                    for name in PROJECT_CONFIG):
1986                 args = ['.']
1987             else:
1988                 parser.error('input not specified')
1989         options = read_config(options, args, arglist, parser)
1990         options.reporter = parse_argv and options.quiet == 1 and FileReport
1991
1992     options.filename = options.filename and options.filename.split(',')
1993     options.exclude = normalize_paths(options.exclude)
1994     options.select = options.select and options.select.split(',')
1995     options.ignore = options.ignore and options.ignore.split(',')
1996
1997     if options.diff:
1998         options.reporter = DiffReport
1999         stdin = stdin_get_value()
2000         options.selected_lines = parse_udiff(stdin, options.filename, args[0])
2001         args = sorted(options.selected_lines)
2002
2003     return options, args
2004
2005
2006 def _main():
2007     """Parse options and run checks on Python source."""
2008     import signal
2009
2010     # Handle "Broken pipe" gracefully
2011     try:
2012         signal.signal(signal.SIGPIPE, lambda signum, frame: sys.exit(1))
2013     except AttributeError:
2014         pass    # not supported on Windows
2015
2016     pep8style = StyleGuide(parse_argv=True, config_file=True)
2017     options = pep8style.options
2018     if options.doctest or options.testsuite:
2019         from testsuite.support import run_tests
2020         report = run_tests(pep8style)
2021     else:
2022         report = pep8style.check_files()
2023     if options.statistics:
2024         report.print_statistics()
2025     if options.benchmark:
2026         report.print_benchmark()
2027     if options.testsuite and not options.quiet:
2028         report.print_results()
2029     if report.total_errors:
2030         if options.count:
2031             sys.stderr.write(str(report.total_errors) + '\n')
2032         sys.exit(1)
2033
2034 if __name__ == '__main__':
2035     _main()