diff --git a/mypy/build.py b/mypy/build.py index 96ba59dd10956..08523c5d3ea9a 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -27,8 +27,10 @@ import time import types from collections.abc import Callable, Iterator, Mapping, Sequence, Set as AbstractSet +from concurrent.futures import ThreadPoolExecutor, wait from heapq import heappop, heappush from textwrap import dedent +from threading import Lock from typing import ( TYPE_CHECKING, Any, @@ -116,6 +118,7 @@ ImportBase, ImportFrom, MypyFile, + ParseError, SymbolTable, ) from mypy.options import OPTIONS_AFFECTING_CACHE_NO_PLATFORM @@ -125,6 +128,7 @@ from mypy.util import ( DecodeError, decode_python_encoding, + get_available_threads, get_mypy_comments, hash_digest, hash_digest_bytes, @@ -161,7 +165,7 @@ ) from mypy.nodes import Expression from mypy.options import Options -from mypy.parse import load_from_raw, parse +from mypy.parse import load_from_raw, parse, report_parse_error from mypy.plugin import ChainedPlugin, Plugin, ReportConfigContext from mypy.plugins.default import DefaultPlugin from mypy.renaming import LimitedVariableRenameVisitor, VariableRenameVisitor @@ -800,6 +804,8 @@ def __init__( parallel_worker: bool = False, ) -> None: self.stats: dict[str, Any] = {} # Values are ints or floats + # Use in cases where we need to prevent race conditions in stats reporting. + self.stats_lock = Lock() self.stdout = stdout self.stderr = stderr self.start_time = time.time() @@ -952,6 +958,75 @@ def dump_stats(self) -> None: for key, value in sorted(self.stats_summary().items()): print(f"{key + ':':24}{value}") + def parse_all(self, states: list[State]) -> None: + """Parse multiple files in parallel (if possible) and compute dependencies. + + Note: this duplicates a bit of logic from State.parse_file(). This is done + as a micro-optimization to parallelize only those parts of the code that + can be parallelized efficiently. + """ + if self.options.native_parser: + futures = [] + parsed_states = {} + # Use at least --num-workers if specified by user. + available_threads = max(get_available_threads(), self.options.num_workers) + # Overhead from trying to parallelize (small) blocking portion of + # parse_file_inner() results in no visible improvement with more than 8 threads. + # TODO: reuse thread pool and/or batch small files in single submit() call. + with ThreadPoolExecutor(max_workers=min(available_threads, 8)) as executor: + for state in states: + state.needs_parse = False + if state.tree is not None: + # The file was already parsed. + continue + # New parser reads source from file directly, we do this only for + # the side effect of parsing inline mypy configurations. + state.get_source() + if state.id not in self.ast_cache: + self.log(f"Parsing {state.xpath} ({state.id})") + ignore_errors = state.ignore_all or state.options.ignore_errors + if ignore_errors: + self.errors.ignored_files.add(state.xpath) + futures.append(executor.submit(state.parse_file_inner, state.source or "")) + parsed_states[state.id] = state + else: + self.log(f"Using cached AST for {state.xpath} ({state.id})") + state.tree, state.early_errors = self.ast_cache[state.id] + for fut in wait(futures).done: + state_id, parse_errors = fut.result() + if parse_errors: + state = parsed_states[state_id] + with state.wrap_context(): + self.errors.set_file(state.xpath, state.id, options=state.options) + for error in parse_errors: + # New parser reports errors lazily. + report_parse_error(error, self.errors) + if self.errors.is_blockers(): + self.log("Bailing due to parse errors") + self.errors.raise_error() + + for state in states: + assert state.tree is not None + if state.id in parsed_states: + state.early_errors = list(self.errors.error_info_map.get(state.xpath, [])) + state.semantic_analysis_pass1() + self.ast_cache[state.id] = (state.tree, state.early_errors) + self.modules[state.id] = state.tree + state.check_blockers() + state.setup_errors() + else: + # Old parser cannot be parallelized. + for state in states: + state.parse_file() + + for state in states: + state.compute_dependencies() + if self.workers and state.tree: + # We don't need imports in coordinator process anymore, we parse only to + # compute dependencies. + state.tree.imports = [] + del self.ast_cache[state.id] + def use_fine_grained_cache(self) -> bool: return self.cache_enabled and self.options.use_fine_grained_cache @@ -1069,10 +1144,9 @@ def parse_file( id: str, path: str, source: str, - ignore_errors: bool, options: Options, raw_data: FileRawData | None = None, - ) -> MypyFile: + ) -> tuple[MypyFile, list[ParseError]]: """Parse the source of a file with the given name. Raise CompileError if there is a parse error. @@ -1082,25 +1156,24 @@ def parse_file( # Currently, we can use the native parser only for actual files. imports_only = True t0 = time.time() - if ignore_errors: - self.errors.ignored_files.add(path) + parse_errors: list[ParseError] = [] if raw_data: # If possible, deserialize from known binary data instead of parsing from scratch. tree = load_from_raw(path, id, raw_data, self.errors, options) else: - tree = parse(source, path, id, self.errors, options=options, imports_only=imports_only) + tree, parse_errors = parse( + source, path, id, self.errors, options=options, imports_only=imports_only + ) tree._fullname = id - self.add_stats( - files_parsed=1, - modules_parsed=int(not tree.is_stub), - stubs_parsed=int(tree.is_stub), - parse_time=time.time() - t0, - ) - - if self.errors.is_blockers(): - self.log("Bailing due to parse errors") - self.errors.raise_error() - return tree + if self.stats_enabled: + with self.stats_lock: + self.add_stats( + files_parsed=1, + modules_parsed=int(not tree.is_stub), + stubs_parsed=int(tree.is_stub), + parse_time=time.time() - t0, + ) + return tree, parse_errors def load_fine_grained_deps(self, id: str) -> dict[str, set[str]]: t0 = time.time() @@ -2509,8 +2582,7 @@ def new_state( # we need to re-calculate dependencies. # NOTE: see comment below for why we skip this in fine-grained mode. if exist_added_packages(suppressed, manager): - state.parse_file() # This is safe because the cache is anyway stale. - state.compute_dependencies() + state.needs_parse = True # This is safe because the cache is anyway stale. # This is an inverse to the situation above. If we had an import like this: # from pkg import mod # and then mod was deleted, we need to force recompute dependencies, to @@ -2519,8 +2591,7 @@ def new_state( # import pkg # import pkg.mod if exist_removed_submodules(dependencies, manager): - state.parse_file() # Same as above, the current state is stale anyway. - state.compute_dependencies() + state.needs_parse = True # Same as above, the current state is stale anyway. state.size_hint = meta.size else: # When doing a fine-grained cache load, pretend we only @@ -2530,14 +2601,17 @@ def new_state( manager.log(f"Deferring module to fine-grained update {path} ({id})") raise ModuleNotFound - # Parse the file (and then some) to get the dependencies. - state.parse_file(temporary=temporary) - state.compute_dependencies() - if manager.workers and state.tree: - # We don't need imports in coordinator process anymore, we parse only to - # compute dependencies. - state.tree.imports = [] - del manager.ast_cache[id] + if temporary: + # Eagerly parse temporary states, they are needed rarely. + state.parse_file(temporary=True) + state.compute_dependencies() + if state.manager.workers and state.tree: + # We don't need imports in coordinator process anymore, we parse only to + # compute dependencies. + state.tree.imports = [] + del state.manager.ast_cache[state.id] + else: + state.needs_parse = True return state @@ -2600,6 +2674,8 @@ def __init__( # Pre-computed opaque value of suppressed_deps_opts() used # to minimize amount of data sent to parallel workers. self.known_suppressed_deps_opts: bytes | None = None + # An internal flag used by build manager to schedule states for parsing. + self.needs_parse = False def write(self, buf: WriteBuffer) -> None: """Serialize State for sending to build worker. @@ -2835,26 +2911,9 @@ def fix_cross_refs(self) -> None: # Methods for processing modules from source code. - def parse_file(self, *, temporary: bool = False, raw_data: FileRawData | None = None) -> None: - """Parse file and run first pass of semantic analysis. - - Everything done here is local to the file. Don't depend on imported - modules in any way. Also record module dependencies based on imports. - """ - if self.tree is not None: - # The file was already parsed (in __init__()). - return - + def get_source(self) -> str: + """Get module source and parse inline mypy configurations.""" manager = self.manager - - # Can we reuse a previously parsed AST? This avoids redundant work in daemon. - cached = self.id in manager.ast_cache - modules = manager.modules - if not cached: - manager.log(f"Parsing {self.xpath} ({self.id})") - else: - manager.log(f"Using cached AST for {self.xpath} ({self.id})") - t0 = time_ref() with self.wrap_context(): @@ -2896,33 +2955,60 @@ def parse_file(self, *, temporary: bool = False, raw_data: FileRawData | None = self.check_for_invalid_options() self.size_hint = len(source) - if not cached: - ignore_errors = self.ignore_all or self.options.ignore_errors - self.tree = manager.parse_file( - self.id, - self.xpath, - source, - ignore_errors=ignore_errors, - options=self.options, - raw_data=raw_data, - ) - else: - # Reuse a cached AST - self.tree = manager.ast_cache[self.id][0] + self.time_spent_us += time_spent_us(t0) + return source + def parse_file_inner( + self, source: str, raw_data: FileRawData | None = None + ) -> tuple[str, list[ParseError]]: + t0 = time_ref() + self.tree, parse_errors = self.manager.parse_file( + self.id, self.xpath, source, options=self.options, raw_data=raw_data + ) self.time_spent_us += time_spent_us(t0) + return self.id, parse_errors + + def parse_file(self, *, temporary: bool = False, raw_data: FileRawData | None = None) -> None: + """Parse file and run first pass of semantic analysis. + + Everything done here is local to the file. Don't depend on imported + modules in any way. Logic here should be kept in sync with BuildManager.parse_all(). + """ + self.needs_parse = False + if self.tree is not None: + # The file was already parsed. + return - if not cached: + source = self.get_source() + manager = self.manager + # Can we reuse a previously parsed AST? This avoids redundant work in daemon. + if self.id not in manager.ast_cache: + self.manager.log(f"Parsing {self.xpath} ({self.id})") + ignore_errors = self.ignore_all or self.options.ignore_errors + if ignore_errors: + self.manager.errors.ignored_files.add(self.xpath) + with self.wrap_context(): + manager.errors.set_file(self.xpath, self.id, options=self.options) + _, parse_errors = self.parse_file_inner(source, raw_data) + for error in parse_errors: + # New parser reports errors lazily. + report_parse_error(error, manager.errors) + if manager.errors.is_blockers(): + manager.log("Bailing due to parse errors") + manager.errors.raise_error() # Make a copy of any errors produced during parse time so that # fine-grained mode can repeat them when the module is # reprocessed. self.early_errors = list(manager.errors.error_info_map.get(self.xpath, [])) self.semantic_analysis_pass1() else: - self.early_errors = manager.ast_cache[self.id][1] + # Reuse a cached AST + manager.log(f"Using cached AST for {self.xpath} ({self.id})") + self.tree, self.early_errors = manager.ast_cache[self.id] + assert self.tree is not None if not temporary: - modules[self.id] = self.tree + manager.modules[self.id] = self.tree self.check_blockers() manager.ast_cache[self.id] = (self.tree, self.early_errors) @@ -3094,14 +3180,15 @@ def detect_possibly_undefined_vars(self) -> None: if manager.errors.is_error_code_enabled( codes.POSSIBLY_UNDEFINED ) or manager.errors.is_error_code_enabled(codes.USED_BEFORE_DEF): - self.tree.accept( - PossiblyUndefinedVariableVisitor( - MessageBuilder(manager.errors, manager.modules), - self.type_map(), - self.options, - self.tree.names, + with self.wrap_context(): + self.tree.accept( + PossiblyUndefinedVariableVisitor( + MessageBuilder(manager.errors, manager.modules), + self.type_map(), + self.options, + self.tree.names, + ) ) - ) def finish_passes(self) -> None: assert self.tree is not None, "Internal error: method must be called on parsed file only" @@ -3323,14 +3410,16 @@ def generate_unused_ignore_notes(self) -> None: if self.meta and self.options.fine_grained_incremental: self.verify_dependencies(suppressed_only=True) is_typeshed = self.tree is not None and self.tree.is_typeshed_file(self.options) - self.manager.errors.generate_unused_ignore_errors(self.xpath, is_typeshed) + with self.wrap_context(): + self.manager.errors.generate_unused_ignore_errors(self.xpath, is_typeshed) def generate_ignore_without_code_notes(self) -> None: if self.manager.errors.is_error_code_enabled(codes.IGNORE_WITHOUT_CODE): is_typeshed = self.tree is not None and self.tree.is_typeshed_file(self.options) - self.manager.errors.generate_ignore_without_code_errors( - self.xpath, self.options.warn_unused_ignores, is_typeshed - ) + with self.wrap_context(): + self.manager.errors.generate_ignore_without_code_errors( + self.xpath, self.options.warn_unused_ignores, is_typeshed + ) # Module import and diagnostic glue @@ -3635,12 +3724,14 @@ def skipping_ancestor(manager: BuildManager, id: str, path: str, ancestor_for: S # immediately if it's empty or only contains comments. # But beware, some package may be the ancestor of many modules, # so we'd need to cache the decision. + save_import_context = manager.errors.import_context() manager.errors.set_import_context([]) manager.errors.set_file(ancestor_for.xpath, ancestor_for.id, manager.options) manager.error(None, f'Ancestor package "{id}" ignored', only_once=True) manager.note( None, "(Using --follow-imports=error, submodule passed on command line)", only_once=True ) + manager.errors.set_import_context(save_import_context) def log_configuration(manager: BuildManager, sources: list[BuildSource]) -> None: @@ -3928,6 +4019,7 @@ def load_graph( graph[st.id] = st new.append(st) entry_points.add(bs.module) + manager.parse_all([state for state in new if state.needs_parse]) # Note: Running this each time could be slow in the daemon. If it's a problem, we # can do more work to maintain this incrementally. @@ -3935,7 +4027,16 @@ def load_graph( # Collect dependencies. We go breadth-first. # More nodes might get added to new as we go, but that's fine. + ready = set(new) + # Use list to make syntax error order a bit more stable. + not_ready: list[State] = [] for st in new: + if st not in ready: + # We have run out of states, parse all we have. + assert st in not_ready + manager.parse_all(not_ready) + ready.update(not_ready) + not_ready.clear() assert st.ancestors is not None # Strip out indirect dependencies. These will be dealt with # when they show up as direct dependencies, and there's a @@ -3991,6 +4092,7 @@ def load_graph( newst_path = newst.abspath if newst_path in seen_files: + manager.errors.set_file(newst.xpath, newst.id, manager.options) manager.error( None, "Source file found twice under different module names: " @@ -4011,6 +4113,10 @@ def load_graph( assert newst.id not in graph, newst.id graph[newst.id] = newst new.append(newst) + if newst.needs_parse: + not_ready.append(newst) + else: + ready.add(newst) # There are two things we need to do after the initial load loop. One is up-suppress # modules that are back in graph. We need to do this after the loop to cover edge cases # like where a namespace package ancestor is shared by a typed and an untyped package. diff --git a/mypy/checkstrformat.py b/mypy/checkstrformat.py index 55605274aa1f1..47e45c5c602f0 100644 --- a/mypy/checkstrformat.py +++ b/mypy/checkstrformat.py @@ -39,13 +39,12 @@ MemberExpr, MypyFile, NameExpr, - Node, StarExpr, StrExpr, TempNode, TupleExpr, ) -from mypy.parse import parse +from mypy.parse import parse, report_parse_error from mypy.subtypes import is_subtype from mypy.typeops import custom_special_method from mypy.types import ( @@ -582,9 +581,12 @@ def apply_field_accessors( temp_errors = Errors(self.chk.options) dummy = DUMMY_FIELD_NAME + spec.field[len(spec.key) :] - temp_ast: Node = parse( + temp_ast, parse_errors = parse( dummy, fnam="", module=None, options=self.chk.options, errors=temp_errors ) + for error in parse_errors: + # New parser reports errors lazily. + report_parse_error(error, temp_errors) if temp_errors.is_errors(): self.msg.fail( f'Syntax error in format specifier "{spec.field}"', diff --git a/mypy/metastore.py b/mypy/metastore.py index 3d32ba29ae107..1a2a7b335e72b 100644 --- a/mypy/metastore.py +++ b/mypy/metastore.py @@ -157,7 +157,7 @@ def close(self) -> None: def connect_db(db_file: str) -> sqlite3.Connection: import sqlite3.dbapi2 - db = sqlite3.dbapi2.connect(db_file) + db = sqlite3.dbapi2.connect(db_file, check_same_thread=False) # This is a bit unfortunate (as we may get corrupt cache after e.g. Ctrl + C), # but without this flag, commits are *very* slow, especially when using HDDs, # see https://www.sqlite.org/faq.html#q19 for details. diff --git a/mypy/nativeparse.py b/mypy/nativeparse.py index c976ba9284401..2e57016e9d4a8 100644 --- a/mypy/nativeparse.py +++ b/mypy/nativeparse.py @@ -20,7 +20,8 @@ from __future__ import annotations import os -from typing import Any, Final, cast +import time +from typing import Final, cast import ast_serialize # type: ignore[import-untyped, import-not-found, unused-ignore] from librt.internal import ( @@ -101,6 +102,7 @@ OpExpr, OverloadedFuncDef, OverloadPart, + ParseError, PassStmt, RaiseStmt, RefExpr, @@ -168,17 +170,11 @@ class State: def __init__(self, options: Options) -> None: self.options = options - self.errors: list[dict[str, Any]] = [] + self.errors: list[ParseError] = [] self.num_funcs = 0 def add_error( - self, - message: str, - line: int, - column: int, - *, - blocker: bool = False, - code: str | None = None, + self, message: str, line: int, column: int, *, blocker: bool = False, code: str ) -> None: """Report an error at a specific location. @@ -196,7 +192,7 @@ def add_error( def native_parse( filename: str, options: Options, skip_function_bodies: bool = False, imports_only: bool = False -) -> tuple[MypyFile, list[dict[str, Any]], TypeIgnores]: +) -> tuple[MypyFile, list[ParseError], TypeIgnores]: """Parse a Python file using the native Rust-based parser. Uses the ast_serialize Rust extension to parse Python code and deserialize @@ -214,7 +210,7 @@ def native_parse( Returns: A tuple containing: - MypyFile: The parsed AST as a mypy AST node - - list[dict[str, Any]]: List of parse errors and deserialization errors + - list[ParseError]: List of parse errors and deserialization errors - TypeIgnores: List of (line_number, ignored_codes) tuples for type: ignore comments """ # If the path is a directory, return empty AST (matching fastparse behavior) @@ -272,7 +268,11 @@ def read_statements(state: State, data: ReadBuffer, n: int) -> list[Statement]: def parse_to_binary_ast( filename: str, options: Options, skip_function_bodies: bool = False -) -> tuple[bytes, list[dict[str, Any]], TypeIgnores, bytes, bool, bool]: +) -> tuple[bytes, list[ParseError], TypeIgnores, bytes, bool, bool]: + # This is a horrible hack to work around a mypyc bug where imported + # module may be not ready in a thread sometimes. + while ast_serialize is None: + time.sleep(0.0001) # type: ignore[unreachable] ast_bytes, errors, ignores, import_bytes, ast_data = ast_serialize.parse( filename, skip_function_bodies=skip_function_bodies, @@ -284,7 +284,7 @@ def parse_to_binary_ast( ) return ( ast_bytes, - cast("list[dict[str, Any]]", errors), + errors, ignores, import_bytes, ast_data["is_partial_package"], diff --git a/mypy/nodes.py b/mypy/nodes.py index 46d1c870d0556..a8dd366c0e3ee 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -14,11 +14,13 @@ Final, Optional, TypeAlias as _TypeAlias, + TypedDict, TypeGuard, TypeVar, Union, cast, ) +from typing_extensions import NotRequired from librt.internal import ( extract_symbol, @@ -39,7 +41,9 @@ LIST_GEN, LIST_STR, LITERAL_COMPLEX, + LITERAL_FALSE, LITERAL_NONE, + LITERAL_TRUE, ReadBuffer, Tag, WriteBuffer, @@ -313,6 +317,39 @@ def read(cls, data: ReadBuffer) -> SymbolNode: Definition: _TypeAlias = tuple[str, "SymbolTableNode", Optional["TypeInfo"]] +class ParseError(TypedDict): + line: int + column: int + message: str + blocker: NotRequired[bool] + code: NotRequired[str] + + +def write_parse_error(data: WriteBuffer, err: ParseError) -> None: + write_int(data, err["line"]) + write_int(data, err["column"]) + write_str(data, err["message"]) + if (blocker := err.get("blocker")) is not None: + write_bool(data, blocker) + else: + write_tag(data, LITERAL_NONE) + write_str_opt(data, err.get("code")) + + +def read_parse_error(data: ReadBuffer) -> ParseError: + err: ParseError = {"line": read_int(data), "column": read_int(data), "message": read_str(data)} + tag = read_tag(data) + if tag == LITERAL_TRUE: + err["blocker"] = True + elif tag == LITERAL_FALSE: + err["blocker"] = False + else: + assert tag == LITERAL_NONE + if (code := read_str_opt(data)) is not None: + err["code"] = code + return err + + class FileRawData: """Raw (binary) data representing parsed, but not deserialized file.""" @@ -327,7 +364,7 @@ class FileRawData: defs: bytes imports: bytes - raw_errors: list[dict[str, Any]] # TODO: switch to more precise type here. + raw_errors: list[ParseError] ignored_lines: dict[int, list[str]] is_partial_stub_package: bool uses_template_strings: bool @@ -336,7 +373,7 @@ def __init__( self, defs: bytes, imports: bytes, - raw_errors: list[dict[str, Any]], + raw_errors: list[ParseError], ignored_lines: dict[int, list[str]], is_partial_stub_package: bool, uses_template_strings: bool, @@ -354,7 +391,7 @@ def write(self, data: WriteBuffer) -> None: write_tag(data, LIST_GEN) write_int_bare(data, len(self.raw_errors)) for err in self.raw_errors: - write_json(data, err) + write_parse_error(data, err) write_tag(data, DICT_INT_GEN) write_int_bare(data, len(self.ignored_lines)) for line, codes in self.ignored_lines.items(): @@ -368,7 +405,7 @@ def read(cls, data: ReadBuffer) -> FileRawData: defs = read_bytes(data) imports = read_bytes(data) assert read_tag(data) == LIST_GEN - raw_errors = [read_json(data) for _ in range(read_int_bare(data))] + raw_errors = [read_parse_error(data) for _ in range(read_int_bare(data))] assert read_tag(data) == DICT_INT_GEN ignored_lines = {read_int(data): read_str_list(data) for _ in range(read_int_bare(data))} return FileRawData( diff --git a/mypy/parse.py b/mypy/parse.py index 093653553137f..d1fbecaabde92 100644 --- a/mypy/parse.py +++ b/mypy/parse.py @@ -8,7 +8,7 @@ from mypy import errorcodes as codes from mypy.cache import read_int from mypy.errors import Errors -from mypy.nodes import FileRawData, MypyFile +from mypy.nodes import FileRawData, MypyFile, ParseError from mypy.options import Options @@ -18,9 +18,8 @@ def parse( module: str | None, errors: Errors, options: Options, - raise_on_error: bool = False, imports_only: bool = False, -) -> MypyFile: +) -> tuple[MypyFile, list[ParseError]]: """Parse a source file, without doing any semantic analysis. Return the parse tree. If errors is not provided, raise ParseError @@ -37,8 +36,6 @@ def parse( ignore_errors = options.ignore_errors or fnam in errors.ignored_files # If errors are ignored, we can drop many function bodies to speed up type checking. strip_function_bodies = ignore_errors and not options.preserve_asts - - errors.set_file(fnam, module, options=options) tree, parse_errors, type_ignores = mypy.nativeparse.native_parse( fnam, options, @@ -51,26 +48,7 @@ def parse( tree.is_stub = fnam.endswith(".pyi") # Note: tree.imports is populated directly by native_parse with deserialized # import metadata, so we don't need to collect imports via AST traversal - - # Report parse errors - for error in parse_errors: - message = error["message"] - # Standardize error message by capitalizing the first word - message = re.sub(r"^(\s*\w)", lambda m: m.group(1).upper(), message) - # Respect blocker status from error, default to True for syntax errors - is_blocker = error.get("blocker", True) - error_code = error.get("code") - if error_code is None: - error_code = codes.SYNTAX - else: - # Fallback to [syntax] for backwards compatibility. - error_code = codes.error_codes.get(error_code) or codes.SYNTAX - errors.report( - error["line"], error["column"], message, blocker=is_blocker, code=error_code - ) - if raise_on_error and errors.is_errors(): - errors.raise_error() - return tree + return tree, parse_errors # Fall through to fastparse for non-existent files assert not imports_only @@ -79,9 +57,7 @@ def parse( import mypy.fastparse tree = mypy.fastparse.parse(source, fnam=fnam, module=module, errors=errors, options=options) - if raise_on_error and errors.is_errors(): - errors.raise_error() - return tree + return tree, [] def load_from_raw( @@ -112,14 +88,21 @@ def load_from_raw( all_errors = raw_data.raw_errors + state.errors errors.set_file(fnam, module, options=options) for error in all_errors: - message = error["message"] - message = re.sub(r"^(\s*\w)", lambda m: m.group(1).upper(), message) - is_blocker = error.get("blocker", True) - error_code = error.get("code") - if error_code is None: - error_code = codes.SYNTAX - else: - error_code = codes.error_codes.get(error_code) or codes.SYNTAX # Note we never raise in this function, so it should not be called in coordinator. - errors.report(error["line"], error["column"], message, blocker=is_blocker, code=error_code) + report_parse_error(error, errors) return tree + + +def report_parse_error(error: ParseError, errors: Errors) -> None: + message = error["message"] + # Standardize error message by capitalizing the first word + message = re.sub(r"^(\s*\w)", lambda m: m.group(1).upper(), message) + # Respect blocker status from error, default to True for syntax errors + is_blocker = error.get("blocker", True) + error_code = error.get("code") + if error_code is None: + error_code = codes.SYNTAX + else: + # Fallback to [syntax] for backwards compatibility. + error_code = codes.error_codes.get(error_code) or codes.SYNTAX + errors.report(error["line"], error["column"], message, blocker=is_blocker, code=error_code) diff --git a/mypy/semanal_main.py b/mypy/semanal_main.py index edc6ee4143f29..0f2a1e3f30ea3 100644 --- a/mypy/semanal_main.py +++ b/mypy/semanal_main.py @@ -463,17 +463,18 @@ def apply_class_plugin_hooks(graph: Graph, scc: list[str], errors: Errors) -> No state = graph[module] tree = state.tree assert tree - for _, node, _ in tree.local_definitions(): - if isinstance(node.node, TypeInfo): - if not apply_hooks_to_class( - state.manager.semantic_analyzer, - module, - node.node, - state.options, - tree, - errors, - ): - incomplete = True + with state.wrap_context(): + for _, node, _ in tree.local_definitions(): + if isinstance(node.node, TypeInfo): + if not apply_hooks_to_class( + state.manager.semantic_analyzer, + module, + node.node, + state.options, + tree, + errors, + ): + incomplete = True def apply_hooks_to_class( @@ -524,7 +525,10 @@ def calculate_class_properties(graph: Graph, scc: list[str], errors: Errors) -> assert tree for _, node, _ in tree.local_definitions(): if isinstance(node.node, TypeInfo): - with state.manager.semantic_analyzer.file_context(tree, state.options, node.node): + with ( + state.wrap_context(), + state.manager.semantic_analyzer.file_context(tree, state.options, node.node), + ): calculate_class_abstract_status(node.node, tree.is_stub, errors) check_protocol_status(node.node, errors) calculate_class_vars(node.node) diff --git a/mypy/stubgen.py b/mypy/stubgen.py index ce6335e9e34f9..bbe286e80cd7e 100755 --- a/mypy/stubgen.py +++ b/mypy/stubgen.py @@ -1744,10 +1744,12 @@ def parse_source_file(mod: StubSource, mypy_options: MypyOptions) -> None: data = f.read() source = mypy.util.decode_python_encoding(data) errors = Errors(mypy_options) - mod.ast = mypy.parse.parse( + mod.ast, errs = mypy.parse.parse( source, fnam=mod.path, module=mod.module, errors=errors, options=mypy_options ) mod.ast._fullname = mod.module + for err in errs: + mypy.parse.report_parse_error(err, errors) if errors.is_blockers(): # Syntax error! for m in errors.new_messages(): diff --git a/mypy/test/test_nativeparse.py b/mypy/test/test_nativeparse.py index d3afbff0ed430..94be60e328b7d 100644 --- a/mypy/test/test_nativeparse.py +++ b/mypy/test/test_nativeparse.py @@ -11,7 +11,6 @@ import tempfile import unittest from collections.abc import Iterator -from typing import Any from mypy import defaults, nodes from mypy.cache import ( @@ -25,7 +24,7 @@ ) from mypy.config_parser import parse_mypy_comments from mypy.errors import CompileError -from mypy.nodes import MypyFile +from mypy.nodes import MypyFile, ParseError from mypy.options import Options from mypy.test.data import DataDrivenTestCase, DataSuite from mypy.test.helpers import assert_string_arrays_equal @@ -102,7 +101,7 @@ def test_parser(testcase: DataDrivenTestCase) -> None: ) -def format_error(err: dict[str, Any]) -> str: +def format_error(err: ParseError) -> str: return f"{err['line']}:{err['column']}: error: {err['message']}" diff --git a/mypy/test/testgraph.py b/mypy/test/testgraph.py index 491fcf427e65d..aec6576189661 100644 --- a/mypy/test/testgraph.py +++ b/mypy/test/testgraph.py @@ -117,6 +117,7 @@ def test_sorted_components(self) -> None: "c": State.new_state("c", None, "import b, d", manager), "builtins": State.new_state("builtins", None, "", manager), } + manager.parse_all(list(graph.values())) res = [scc.mod_ids for scc in sorted_components(graph)] assert_equal(res, [{"builtins"}, {"d"}, {"c", "b"}, {"a"}]) @@ -129,6 +130,7 @@ def test_order_ascc(self) -> None: "c": State.new_state("c", None, "import b, d", manager), "builtins": State.new_state("builtins", None, "", manager), } + manager.parse_all(list(graph.values())) res = [scc.mod_ids for scc in sorted_components(graph)] assert_equal(res, [{"builtins"}, {"a", "d", "c", "b"}]) ascc = res[1] diff --git a/mypy/test/testparse.py b/mypy/test/testparse.py index 00f234f9088ed..5c301a1858592 100644 --- a/mypy/test/testparse.py +++ b/mypy/test/testparse.py @@ -10,7 +10,7 @@ from mypy.config_parser import parse_mypy_comments from mypy.errors import CompileError, Errors from mypy.options import Options -from mypy.parse import parse +from mypy.parse import parse, report_parse_error from mypy.test.data import DataDrivenTestCase, DataSuite from mypy.test.helpers import assert_string_arrays_equal, find_test_files, parse_options from mypy.util import get_mypy_comments @@ -59,14 +59,14 @@ def test_parser(testcase: DataDrivenTestCase) -> None: options = options.apply_changes(changes) try: - n = parse( - bytes(source, "ascii"), - fnam="main", - module="__main__", - errors=Errors(options), - options=options, - raise_on_error=True, + errors = Errors(options) + n, errs = parse( + bytes(source, "ascii"), fnam="main", module="__main__", errors=errors, options=options ) + for err in errs: + report_parse_error(err, errors) + if errors.is_errors(): + errors.raise_error() a = n.str_with_options(options).split("\n") except CompileError as e: a = e.messages @@ -97,14 +97,18 @@ def test_parse_error(testcase: DataDrivenTestCase) -> None: if options.python_version != sys.version_info[:2]: skip() # Compile temporary file. The test file contains non-ASCII characters. - parse( + errors = Errors(options) + _, errs = parse( bytes("\n".join(testcase.input), "utf-8"), INPUT_FILE_NAME, "__main__", - errors=Errors(options), + errors=errors, options=options, - raise_on_error=True, ) + for err in errs: + report_parse_error(err, errors) + if errors.is_errors(): + errors.raise_error() raise AssertionError("No errors reported") except CompileError as e: if e.module_with_blocker is not None: diff --git a/mypy/util.py b/mypy/util.py index 86998380ff0aa..916e2629e2b10 100644 --- a/mypy/util.py +++ b/mypy/util.py @@ -28,6 +28,13 @@ except ImportError: CURSES_ENABLED = False +try: + import psutil + + PSUTIL_AVAILABLE = True +except ImportError: + PSUTIL_AVAILABLE = False + T = TypeVar("T") TYPESHED_DIR: Final = str(importlib_resources.files("mypy") / "typeshed") @@ -959,3 +966,46 @@ def json_loads(data: bytes) -> Any: if orjson is not None: return orjson.loads(data) return json.loads(data) + + +_AVAILABLE_THREADS: int | None = None + + +def get_available_threads() -> int: + """Determine number of physical cores that current process can use (best effort).""" + global _AVAILABLE_THREADS + if _AVAILABLE_THREADS is not None: + return _AVAILABLE_THREADS + + # This takes into account -X cpu_count and/or PYTHON_CPU_COUNT, but always + # counts virtual cores (which is not what we want for CPU bound tasks). + os_cpu_count = os.cpu_count() + if PSUTIL_AVAILABLE: + # Unlike os, psutil can determine number of physical cores. + psutil_cpu_count = psutil.cpu_count(logical=False) + else: + psutil_cpu_count = None + + if psutil_cpu_count and os_cpu_count: + cpu_count = min(psutil_cpu_count, os_cpu_count) + elif psutil_cpu_count or os_cpu_count: + cpu_count = psutil_cpu_count or os_cpu_count + else: + # A conservative fallback in case we cannot determine CPU count in any way. + cpu_count = 4 + + affinity = None + # Not available on old Python versions on some platforms. + if sys.platform == "linux": + affinity = os.sched_getaffinity(0) + if PSUTIL_AVAILABLE and sys.platform != "darwin": + # Currently not supported on macOS. + affinity = psutil.Process().cpu_affinity() + + assert cpu_count is not None + if affinity: + available_threads = min(cpu_count, len(affinity)) + else: + available_threads = cpu_count + _AVAILABLE_THREADS = available_threads + return available_threads