blender-python-stubs/introspect.py

"""Introspection script that runs inside Blender headless.

Usage: blender --background --factory-startup -noaudio --python introspect.py
"""

import argparse
import importlib
import inspect
import json
import pkgutil
import re
import sys
from collections.abc import Callable
from dataclasses import dataclass
from types import ModuleType
from typing import TypedDict, cast

BLENDER_MODULES = [
    "aud",
    "bl_math",
    "blf",
    "bmesh",
    "bpy",
    "bpy_extras",
    "freestyle",
    "gpu",
    "gpu_extras",
    "idprop",
    "imbuf",
    "mathutils",
]

# Virtual modules not discoverable via pkgutil (C-level or RNA-defined)
EXTRA_MODULES = [
    "bpy.types",
    "bpy.props",
    "bpy.app",
    "bmesh.types",
    "gpu.types",
    "imbuf.types",
    "idprop.types",
]

# Hardcoded types for screen context members that are None in headless mode.
# These are dynamically injected by Blender based on the active editor/mode.
SCREEN_CONTEXT_TYPE_OVERRIDES: dict[str, str] = {
    "active_action": "Action",
    "active_annotation_layer": "GPencilLayer",
    "active_bone": "EditBone",
    "active_editable_fcurve": "FCurve",
    "active_gpencil_frame": "GreasePencilFrame",
    "active_gpencil_layer": "GreasePencilLayer",
    "active_nla_strip": "NlaStrip",
    "active_nla_track": "NlaTrack",
    "active_node": "Node",
    "active_object": "Object",
    "active_operator": "Operator",
    "active_pose_bone": "PoseBone",
    "active_sequence_strip": "Sequence",
    "active_strip": "NlaStrip",
    "annotation_data": "GreasePencil",
    "annotation_data_owner": "ID",
    "edit_object": "Object",
    "editable_bones": "Sequence[EditBone]",
    "editable_gpencil_layers": "Sequence[GPencilLayer]",
    "editable_gpencil_strokes": "Sequence[GPencilStroke]",
    "editable_objects": "Sequence[Object]",
    "gpencil_data": "GreasePencil",
    "gpencil_data_owner": "ID",
    "grease_pencil": "GreasePencil",
    "image_paint_object": "Object",
    "object": "Object",
    "objects_in_mode": "Sequence[Object]",
    "objects_in_mode_unique_data": "Sequence[Object]",
    "particle_edit_object": "Object",
    "pose_object": "Object",
    "property": "str",
    "sculpt_object": "Object",
    "selectable_objects": "Sequence[Object]",
    "selected_bones": "Sequence[EditBone]",
    "selected_editable_actions": "Sequence[Action]",
    "selected_editable_bones": "Sequence[EditBone]",
    "selected_editable_fcurves": "Sequence[FCurve]",
    "selected_editable_keyframes": "Sequence[Keyframe]",
    "selected_editable_objects": "Sequence[Object]",
    "selected_editable_sequences": "Sequence[Sequence]",
    "selected_editable_strips": "Sequence[NlaStrip]",
    "selected_movieclip_tracks": "Sequence[MovieTrackingTrack]",
    "selected_nla_strips": "Sequence[NlaStrip]",
    "selected_objects": "Sequence[Object]",
    "selected_pose_bones": "Sequence[PoseBone]",
    "selected_pose_bones_from_active_object": "Sequence[PoseBone]",
    "selected_sequences": "Sequence[Sequence]",
    "selected_strips": "Sequence[NlaStrip]",
    "selected_visible_actions": "Sequence[Action]",
    "selected_visible_fcurves": "Sequence[FCurve]",
    "sequencer_scene": "Scene",
    "sequences": "Sequence[Sequence]",
    "strips": "Sequence[NlaStrip]",
    "ui_list": "UIList",
    "vertex_paint_object": "Object",
    "visible_bones": "Sequence[EditBone]",
    "visible_fcurves": "Sequence[FCurve]",
    "visible_gpencil_layers": "Sequence[GPencilLayer]",
    "visible_objects": "Sequence[Object]",
    "visible_pose_bones": "Sequence[PoseBone]",
    "weight_paint_object": "Object",
    # Buttons context members (Properties editor panels, not in dir() in headless)
    "armature": "Armature",
    "bone": "Bone",
    "brush": "Brush",
    "camera": "Camera",
    "cloth": "ClothModifier",
    "collision": "CollisionModifier",
    "curve": "Curve",
    "dynamic_paint": "DynamicPaintModifier",
    "edit_bone": "EditBone",
    "fluid": "FluidModifier",
    "hair_curves": "Curves",
    "lattice": "Lattice",
    "light": "Light",
    "lightprobe": "LightProbe",
    "line_style": "FreestyleLineStyle",
    "material": "Material",
    "material_slot": "MaterialSlot",
    "mesh": "Mesh",
    "meta_ball": "MetaBall",
    "node": "Node",
    "particle_settings": "ParticleSettings",
    "particle_system": "ParticleSystem",
    "particle_system_editable": "ParticleSystem",
    "pointcloud": "PointCloud",
    "pose_bone": "PoseBone",
    "soft_body": "SoftBodyModifier",
    "speaker": "Speaker",
    "texture": "Texture",
    "texture_node": "Node",
    "texture_slot": "TextureSlot",
    "texture_user": "ID",
    "texture_user_property": "Property",
    "volume": "Volume",
    "world": "World",
}

# Suffix-based heuristics for screen context member types (order matters: longer first)
SCREEN_CONTEXT_NAME_PATTERNS: list[tuple[str, str]] = [
    ("_objects", "Sequence[Object]"),
    ("_object", "Object"),
    ("_bones", "Sequence[EditBone]"),
    ("_bone", "EditBone"),
    ("_fcurves", "Sequence[FCurve]"),
    ("_fcurve", "FCurve"),
    ("_strips", "Sequence[NlaStrip]"),
    ("_strip", "NlaStrip"),
    ("_actions", "Sequence[Action]"),
    ("_action", "Action"),
    ("_track", "NlaTrack"),
    ("_sequences", "Sequence[Sequence]"),
    ("_nodes", "Sequence[Node]"),
    ("_node", "Node"),
]


def infer_context_member_type(name: str) -> str | None:
    """Infer a screen context member's type from its name suffix."""
    for suffix, type_str in SCREEN_CONTEXT_NAME_PATTERNS:
        if name.endswith(suffix):
            return type_str
    return None


class ParamData(TypedDict):
    name: str
    type: str | None
    default: str | None
    kind: str


class FunctionData(TypedDict):
    name: str
    doc: str
    params: list[ParamData]
    return_type: str | None
    is_classmethod: bool


class VariableData(TypedDict):
    name: str
    type: str
    value: str


class PropertyData(TypedDict):
    name: str
    type: str
    is_readonly: bool
    description: str


class StructData(TypedDict):
    name: str
    doc: str
    base: str | None
    properties: list[PropertyData]
    methods: list[FunctionData]


class ModuleData(TypedDict):
    module: str
    doc: str
    functions: list[FunctionData]
    variables: list[VariableData]
    structs: list[StructData]


def parse_docstring_types(docstring: str) -> tuple[dict[str, str], str | None]:
    """Parse RST-style :type: and :rtype: annotations from a docstring.

    Returns (param_types, return_type) where param_types maps param name to type string.
    """
    if not docstring:
        return {}, None

    param_types: dict[str, str] = {}
    return_type: str | None = None

    # Match :type param: ... up to the next RST directive (:arg, :type, :rtype, :return)
    # but NOT :class: or :func: which appear inside type annotations
    directive_lookahead = r"(?=\n\s*:(?:arg|param|type|rtype|return|returns)[\s:]|$)"
    for match in re.finditer(
        rf":type\s+(\w+):\s*(.+?){directive_lookahead}", docstring, re.DOTALL
    ):
        name = match.group(1)
        type_str = clean_type_str(match.group(2).strip())
        param_types[name] = type_str

    rtype_match = re.search(
        rf":rtype:\s*(.+?){directive_lookahead}", docstring, re.DOTALL
    )
    if rtype_match:
        return_type = clean_type_str(rtype_match.group(1).strip())

    # Infer Literal types from :arg: descriptions when :type: is just "str".
    # Blender 5.0 and earlier list enum values as ``VALUE`` bullet items in :arg:
    # but only declare :type param: str.
    for name, type_str in param_types.items():
        if type_str != "str":
            continue
        # Find the :arg name: block
        arg_match = re.search(
            rf":arg\s+{re.escape(name)}:\s*(.+?){directive_lookahead}",
            docstring,
            re.DOTALL,
        )
        if not arg_match:
            continue
        arg_text = arg_match.group(1)
        values = re.findall(r"``([A-Z][A-Z0-9_]*)``", arg_text)
        if len(values) >= 2:
            quoted = ", ".join(f'"{v}"' for v in values)
            param_types[name] = f"Literal[{quoted}]"

    return param_types, return_type


UNQUALIFIED_TYPES: dict[str, str] = {
    "Stroke": "freestyle.types.Stroke",
    "ViewEdge": "freestyle.types.ViewEdge",
    "Interface0DIterator": "freestyle.types.Interface0DIterator",
    "UnaryFunction0D": "freestyle.types.UnaryFunction0D",
    "IntegrationType": "freestyle.types.IntegrationType",
    "ImBuf": "imbuf.types.ImBuf",
    "Buffer": "gpu.types.Buffer",
    "GPUShader": "gpu.types.GPUShader",
    "GPUShaderCreateInfo": "gpu.types.GPUShaderCreateInfo",
    "GPUStageInterfaceInfo": "gpu.types.GPUStageInterfaceInfo",
    "GPUBatch": "gpu.types.GPUBatch",
    "GPUTexture": "gpu.types.GPUTexture",
    "GPUFrameBuffer": "gpu.types.GPUFrameBuffer",
    "GPUOffScreen": "gpu.types.GPUOffScreen",
    "GPUVertBuf": "gpu.types.GPUVertBuf",
    "GPUVertFormat": "gpu.types.GPUVertFormat",
    "GPUIndexBuf": "gpu.types.GPUIndexBuf",
    "GPUUniformBuf": "gpu.types.GPUUniformBuf",
    "bpy_struct": "bpy.types.bpy_struct",
    "Context": "bpy.types.Context",
    "BlendData": "bpy.types.BlendData",
    "Mesh": "bpy.types.Mesh",
    "Object": "bpy.types.Object",
    "Depsgraph": "bpy.types.Depsgraph",
    "Scene": "bpy.types.Scene",
    "ViewLayer": "bpy.types.ViewLayer",
    "SpaceView3D": "bpy.types.SpaceView3D",
    "Region": "bpy.types.Region",
    "AdjacencyIterator": "freestyle.types.AdjacencyIterator",
    "ChainingIterator": "freestyle.types.ChainingIterator",
    "BMesh": "bmesh.types.BMesh",
    "BMLayerItem": "bmesh.types.BMLayerItem",
    "BMVert": "bmesh.types.BMVert",
    "BMEdge": "bmesh.types.BMEdge",
    "BMFace": "bmesh.types.BMFace",
    "BMLoop": "bmesh.types.BMLoop",
}


def clean_type_str(type_str: str) -> str:
    """Clean up RST type annotations to plain Python type strings."""
    type_str = re.sub(r":class:`([^`]+)`", r"\1", type_str)
    # Remove double backtick RST markup
    type_str = re.sub(r"``([^`]+)``", r"\1", type_str)
    # Strip leaked RST directives from type strings
    type_str = re.sub(r"\.?\s*(?:r?type|returns?):.*", "", type_str)
    # Strip stray RST role colons but not :param, :arg, :type directives
    type_str = re.sub(r":(?!param|arg|type|return)(\w)", r"\1", type_str)
    type_str = type_str.rstrip(":.,")

    # Convert tuple(X, Y) to tuple[X, Y] (docstrings sometimes use parens)
    type_str = re.sub(r"\btuple\(([^)]+)\)", r"tuple[\1]", type_str)

    # Truncate at parameter-like patterns that leaked from function signatures
    # e.g. "Callable[[BMVert], bool] | None, reverse: bool" -> "Callable[[BMVert], bool] | None"
    type_str = re.sub(r",\s+\w+\s*:", "", type_str)

    # Normalize comma-separated types to unions (outside brackets only)
    # "int, float" -> "int | float" but not "tuple[int, float]"
    def replace_commas_outside_brackets(s: str) -> str:
        result: list[str] = []
        depth = 0
        i = 0
        while i < len(s):
            if s[i] in "([":
                depth += 1
                result.append(s[i])
            elif s[i] in ")]":
                depth -= 1
                result.append(s[i])
            elif s[i] == "," and depth == 0:
                result.append(" |")
            else:
                result.append(s[i])
            i += 1
        return "".join(result)

    if "Callable" not in type_str:
        type_str = replace_commas_outside_brackets(type_str)
    # Strip RST directives like ".. note::" and everything after
    type_str = re.sub(r"\.\.\s+\w+::.*", "", type_str, flags=re.DOTALL)
    # Strip trailing prose (sentences after a valid type), but not type keywords like None
    type_str = re.sub(
        r"\s+(?!None\b|True\b|False\b)[A-Z][a-z]+\s+[a-z].*$", "", type_str
    )
    # Strip prose after "or None" / "| None" (e.g. "or None when there is no intersection")
    type_str = re.sub(r"(\bNone)\s+\w.*$", r"\1", type_str)
    # Strip "of size N" suffixes
    type_str = re.sub(r"\s+of size \d+", "", type_str)
    # Strip dimension prefixes like "2d ", "3D ", "4x4 ", "1D or 2D " before type names
    # Must run before "Sequence of" regex to avoid capturing "3d" as a type
    type_str = re.sub(r"\b\d+[dDxX]\d*(?:\s+or\s+\d+[dDxX]\d*)*\s+", "", type_str)
    # "Sequence of Xs containing Ys" -> "Sequence[Sequence[Y]]"
    type_str = re.sub(
        r"\b[Ss]equence of \w+s\s+containing\s+(\w+)s?\b",
        lambda m: f"Sequence[Sequence[{m.group(1)}]]",
        type_str,
    )
    # "Xs containing Y" -> "Sequence[Y]" (standalone, not after "of")
    type_str = re.sub(
        r"\b\w+s\s+containing\s+(\w+)s?\b",
        lambda m: f"Sequence[{m.group(1)}]",
        type_str,
    )
    # Normalize prose-style generic types like "sequence of X", "iterable of X", "collection of X"
    # Optionally skip leading dimension descriptions: "sequence of 3 or 4 floats" -> "Sequence[float]"
    _dim_prefix = r"(?:\d+(?:\s+(?:or|and|to)\s+(?:\d+|more|fewer))*\s+)?"
    type_str = re.sub(
        rf"\b[Ss]equence of {_dim_prefix}(\w[\w.]*)\b",
        lambda m: f"Sequence[{m.group(1)}]",
        type_str,
    )
    type_str = re.sub(
        rf"\b[Ii]terable of {_dim_prefix}(\w[\w.]*)\b",
        lambda m: f"Iterable[{m.group(1)}]",
        type_str,
    )
    type_str = re.sub(
        rf"\b[Cc]ollection of {_dim_prefix}(\w[\w.]*)\b",
        lambda m: f"Collection[{m.group(1)}]",
        type_str,
    )
    # Handle "Sequence of (A, B)" -> "Sequence[tuple[A, B]]"
    type_str = re.sub(
        r"\b[Ss]equence of \(([^)]+)\)",
        lambda m: f"Sequence[tuple[{m.group(1)}]]",
        type_str,
    )
    type_str = re.sub(
        r"\b[Ii]terable of \(([^)]+)\)",
        lambda m: f"Iterable[tuple[{m.group(1)}]]",
        type_str,
    )
    # Strip prose qualifiers like "float triplet" -> "float"
    type_str = re.sub(r"\b(float|int)\s+(triplet|pair|array)\b", r"\1", type_str)
    # Strip number words used as counts (e.g. "four floats" -> "floats")
    type_str = re.sub(
        r"\b(?:one|two|three|four|five|six|seven|eight|nine|ten)\s+",
        "",
        type_str,
    )
    # Handle "tuple of [N] type" -> "tuple[type, ...]"
    type_str = re.sub(
        r"\btuple of (?:\d+ )?([\w.]+\w)\b",
        lambda m: f"tuple[{m.group(1)}, ...]",
        type_str,
    )
    # Handle "list of type" -> "list[type]" (type can be dotted like mathutils.Vector)
    type_str = re.sub(
        r"\blist of ([\w.]+\w)\b",
        lambda m: f"list[{m.group(1)}]",
        type_str,
    )
    # Map "class" -> "type" (used as param type in some docstrings)
    type_str = re.sub(r"\bclass\b", "type", type_str)
    # Map plural/informal type names to proper Python types
    type_str = re.sub(r"\bstrings\b", "str", type_str)
    type_str = re.sub(r"\bfloats\b", "float", type_str)
    type_str = re.sub(r"\bints\b", "int", type_str)
    type_str = re.sub(r"\bbools\b", "bool", type_str)
    type_str = re.sub(r"\bnumbers\b", "float", type_str)
    type_str = re.sub(r"\bvectors\b", "mathutils.Vector", type_str)
    type_str = re.sub(r"\bmatrices\b", "mathutils.Matrix", type_str)
    type_str = re.sub(r"\btuples\b", "tuple[object, ...]", type_str)
    type_str = re.sub(r"\bstring\b", "str", type_str)
    type_str = re.sub(r"\bdouble\b", "float", type_str)
    type_str = re.sub(r"\binteger\b", "int", type_str)
    type_str = re.sub(r"\bboolean\b", "bool", type_str)
    type_str = re.sub(r"\bnumber\b", "float", type_str)
    type_str = re.sub(r"\buint\b", "int", type_str)
    # Map NoneType -> None (valid in type annotations)
    type_str = re.sub(r"\bNone[Tt]ype\b", "None", type_str)
    # Map types that don't exist in Python stubs
    type_str = re.sub(r"\bbuffer\b", "object", type_str)
    type_str = re.sub(r"\b[Aa]ny\b", "object", type_str)
    # Map idprop internal types to object (not available in stubs)
    type_str = re.sub(r"\bidprop\.types?\.\w+\b", "object", type_str)
    type_str = re.sub(r"\b(?:bpy\.types\.)?IDProperty\w*\b", "object", type_str)
    # Map bpy_prop and bpy.types.bpy_prop (internal base, not in stubs) to object
    type_str = re.sub(r"\b(?:bpy\.types\.)?bpy_prop\b(?!_)", "object", type_str)
    # Handle "TYPE sequence" -> "Sequence[TYPE]" (e.g. "int sequence" -> "Sequence[int]")
    type_str = re.sub(r"\b(int|float|bool|str)\s+sequence\b", r"Sequence[\1]", type_str)
    # Normalize "X or Y" -> "X | Y"
    type_str = re.sub(r"\s+or\s+", " | ", type_str)
    # Remove pipe-wrapped type macros like |UV_STICKY_SELECT_MODE_TYPE|
    type_str = re.sub(r"\|[A-Z_]+\|", "str", type_str)
    # Map "callable" / "function" -> "Callable[..., object]"
    type_str = re.sub(r"\bcallable\b", "Callable[..., object]", type_str)
    type_str = re.sub(r"\bfunction\b", "Callable[..., object]", type_str)
    # Map lowercase generic names to proper capitalized forms
    type_str = re.sub(r"\bgenerator\b", "Generator", type_str)
    type_str = re.sub(r"\bsequence\b", "Sequence", type_str)
    # Strip numeric type args: "Sequence[3]" -> "Sequence" (becomes bare, parameterized below)
    type_str = re.sub(r"\[\d+\]", "", type_str)
    # Strip numeric-only union parts: "| 2 | 3" from dimension descriptions
    type_str = re.sub(r"\s*\|\s*\d+\b", "", type_str)
    # Empty brackets after a type name (e.g. dict[] from malformed Blender docstrings)
    # -> strip them so bare generic handling kicks in. Don't strip [] inside Callable[[], ...]
    type_str = re.sub(r"(\w)\[\]", r"\1", type_str)
    # Bare generics without params -> add default params.
    # Use \b on both sides to avoid matching inside longer names (e.g. SequenceEntry).
    type_str = re.sub(r"\bCallable\b(?!\[)", "Callable[..., object]", type_str)
    type_str = re.sub(r"\bdict\b(?!\[)", "dict[str, object]", type_str)
    type_str = re.sub(r"\blist\b(?!\[)", "list[object]", type_str)
    type_str = re.sub(r"\btuple\b(?!\[)", "tuple[object, ...]", type_str)
    type_str = re.sub(r"\bset\b(?!\[)", "set[object]", type_str)
    type_str = re.sub(r"\bfrozenset\b(?!\[)", "frozenset[object]", type_str)
    type_str = re.sub(r"\bGenerator\b(?!\[)", "Generator[object, None, None]", type_str)
    type_str = re.sub(r"\bSequence\b(?!\[)", "Sequence[object]", type_str)
    type_str = re.sub(r"\bIterator\b(?!\[)", "Iterator[object]", type_str)
    type_str = re.sub(r"\bIterable\b(?!\[)", "Iterable[object]", type_str)

    # Fix Sequence/list with multiple type args (docstring bug):
    # Sequence[int, int] -> Sequence[tuple[int, int]]
    type_str = re.sub(
        r"\bSequence\[(\w+),\s*(\w+)\]",
        r"Sequence[tuple[\1, \2]]",
        type_str,
    )
    # list[X, Y, ...] with >1 type args -> treat as list[X] (drop extras)
    def _fix_multi_arg_list(m: re.Match[str]) -> str:
        inner = m.group(1)
        # If it contains nested generics like list[float], keep first one
        parts = []
        depth = 0
        current: list[str] = []
        for ch in inner:
            if ch in "([":
                depth += 1
            elif ch in ")]":
                depth -= 1
            if ch == "," and depth == 0:
                parts.append("".join(current).strip())
                current = []
            else:
                current.append(ch)
        parts.append("".join(current).strip())
        if len(parts) <= 1:
            return m.group(0)
        # If all parts are identical, use list[that_type]
        non_ellipsis = [p for p in parts if p != "..."]
        if len(set(non_ellipsis)) == 1:
            return f"list[{non_ellipsis[0]}]"
        # Mixed types -> treat as tuple
        return f"tuple[{', '.join(non_ellipsis)}]"

    # Apply from innermost out, then handle nested brackets
    prev = ""
    while prev != type_str:
        prev = type_str
        type_str = re.sub(r"\blist\[([^\[\]]+)\]", _fix_multi_arg_list, type_str)
    type_str = re.sub(r"\blist\[(.+)\]", _fix_multi_arg_list, type_str)

    # Fix Literal[X, Y, Z] -> Literal['X', 'Y', 'Z'] (quote bare identifiers)
    def fix_literal(m: re.Match[str]) -> str:
        items = m.group(1)
        quoted = ", ".join(
            f"'{item.strip()}'" if not item.strip().startswith("'") else item.strip()
            for item in items.split(",")
        )
        return f"Literal[{quoted}]"

    type_str = re.sub(r"\bLiteral\[([^\]]+)\]", fix_literal, type_str)
    informal_types: dict[str, str] = {
        "vector": "mathutils.Vector",
        "matrix": "mathutils.Matrix",
        "quaternion": "mathutils.Quaternion",
        "euler": "mathutils.Euler",
        "color": "mathutils.Color",
    }
    for informal, formal in informal_types.items():
        type_str = re.sub(
            rf"(?<!\.)(?<!\w)\b{re.escape(informal)}\b",
            formal,
            type_str,
            flags=re.IGNORECASE,
        )
    # Qualify known unqualified Blender types
    # Map known undefined types to object
    undefined_types = {
        "numpy",
        "bpy_app_translations",
        "BLFImBufContext",
        "AnimateablePropertyP",
        "ModuleType",
        "Undefined",
        "capsule",
        "_translations_type",
        "_PropertyDeferred",
    }
    # Replace types not available in Python 3.11
    type_str = type_str.replace("collections.abc.Buffer", "bytes")
    for undef in undefined_types:
        type_str = re.sub(rf"\b{re.escape(undef)}\b(\.\w+)*", "object", type_str)

    # Map known unqualified Python stdlib types
    type_str = re.sub(r"\bModule\b", "types.ModuleType", type_str)

    # Fix GPU types wrongly referenced as bpy.types.GPU* (Blender docstring bug in 4.x)
    type_str = re.sub(r"\bbpy\.types\.(GPU\w+)\b", r"gpu.types.\1", type_str)

    for bare, qualified in UNQUALIFIED_TYPES.items():
        type_str = re.sub(rf"(?<!\.)(?<!\w)\b{bare}\b", qualified, type_str)

    type_str = re.sub(r"\s+", " ", type_str).strip()
    # Strip trailing punctuation that leaked from docstrings
    type_str = type_str.rstrip(".,;:")
    # Strip possessive "'s" (e.g. "Vector's" from ":rtype: Vector's")
    type_str = re.sub(r"'s\b", "", type_str)

    # If the type contains hyphens (prose like "per-vector weights"), it's not a type
    if "-" in type_str and not re.match(r"^[\w.\[\], |>()\"']+$", type_str):
        return "object"
    # If brackets are unbalanced, the type is malformed — fall back to object
    if type_str.count("[") != type_str.count("]") or type_str.count(
        "("
    ) != type_str.count(")"):
        return "object"
    # Strip English articles before type names
    type_str = re.sub(r"\b(a|an|the)\s+", "", type_str, flags=re.IGNORECASE)
    # Clean up empty union parts and trailing pipes
    type_str = re.sub(r"\|\s*\|", "|", type_str)
    type_str = re.sub(r"\|\s*$", "", type_str)
    type_str = re.sub(r"^\s*\|", "", type_str)
    # Strip trailing text/prose after types (colon-separated or space-separated)
    type_str = re.sub(r"(\])\s*:.*", r"\1", type_str)
    type_str = re.sub(r"(\])\s+\w.*", r"\1", type_str)
    type_str = re.sub(r"(\w)\s*:\s+\w.*", r"\1", type_str)
    # Map informal numeric types
    type_str = re.sub(r"\breal\b", "float", type_str)

    # Split union on | only outside brackets
    def split_union(s: str) -> list[str]:
        parts: list[str] = []
        current: list[str] = []
        depth = 0
        for ch in s:
            if ch in "([":
                depth += 1
                current.append(ch)
            elif ch in ")]":
                depth -= 1
                current.append(ch)
            elif ch == "|" and depth == 0:
                parts.append("".join(current))
                current = []
            else:
                current.append(ch)
        parts.append("".join(current))
        return parts

    # Final fallback: check each union component for prose (spaces without brackets)
    # Also strip trailing punctuation from each component
    if "|" in type_str:
        parts = [p.strip().rstrip(".,;:") for p in split_union(type_str)]

        def is_valid_type(t: str) -> bool:
            if " " in t and "[" not in t:
                return False
            # snake_case identifiers are variable names, not types
            # (e.g. "sphere_radius" from a buggy docstring)
            if re.match(r"^[a-z][a-z0-9_]+$", t) and "_" in t:
                return False
            # Bare lowercase words that aren't known types (e.g. "four", "sequences")
            if re.match(r"^[a-z]+$", t) and t not in (
                "bool",
                "int",
                "float",
                "str",
                "bytes",
                "object",
                "type",
                "None",
            ):
                return False
            return True

        cleaned = [p if is_valid_type(p) else "object" for p in parts if p]
        # Deduplicate while preserving order
        seen: set[str] = set()
        unique: list[str] = []
        for p in cleaned:
            if p not in seen:
                seen.add(p)
                unique.append(p)
        type_str = " | ".join(unique)
    elif " " in type_str and "[" not in type_str:
        type_str = "object"

    # Standalone invalid types (snake_case variable names, bare lowercase prose words)
    if re.match(r"^[a-z][a-z0-9_]+$", type_str) and "_" in type_str:
        type_str = "object"
    if re.match(r"^[a-z]+$", type_str) and type_str not in (
        "bool",
        "int",
        "float",
        "str",
        "bytes",
        "object",
        "type",
    ):
        type_str = "object"

    # Final balance check — catch any remaining malformed types
    if type_str.count("[") != type_str.count("]") or type_str.count(
        "("
    ) != type_str.count(")"):
        return "object"
    return type_str


def sanitize_default(value: str) -> str:
    """Sanitize a repr'd default value to be valid Python syntax."""
    if "<" in value:
        return "..."
    # Replace callable/mutable defaults with ... (not valid as literal defaults in stubs)
    if value in ("set()", "frozenset()", "dict()", "list()"):
        return "..."
    if value.startswith("{") or value.startswith("["):
        return "..."
    # Replace complex expressions (e.g. sys.float_info.min) with ...
    if "." in value and not value.replace(".", "", 1).lstrip("-").isdigit():
        return "..."
    # Replace bare identifiers that aren't Python literals
    # (e.g. "data" from "data=data" in RST signatures)
    if value.isidentifier() and value not in ("True", "False", "None"):
        return "..."
    # Parenthesized single value like (1) is not a valid tuple literal
    if re.match(r"^\(\d+\)$", value):
        return "..."
    return value


# Types that are C-level descriptors, not valid as type annotations
C_INTERNAL_TYPES = {
    "getset_descriptor",
    "member_descriptor",
    "method_descriptor",
    "wrapper_descriptor",
    "builtin_function_or_method",
    "_tuplegetter",
    "classmethod_descriptor",
    "_translations_type",
}


def clean_docstring(docstring: str) -> str:
    """Extract the descriptive part of a docstring, removing RST directives and markup."""
    if not docstring:
        return ""
    lines: list[str] = []
    skip_block = False
    for line in docstring.split("\n"):
        stripped = line.strip()
        # Stop at type annotation directives
        if stripped.startswith((":arg ", ":type ", ":rtype:", ":return:", ":returns:")):
            break
        # Skip RST directive blocks (.. code-block::, .. method::, .. seealso::, etc.)
        if stripped.startswith(".. "):
            skip_block = True
            continue
        # Indented lines after a directive are part of the block
        if skip_block:
            if stripped and not line[0].isspace():
                skip_block = False
            else:
                continue
        lines.append(line)
    while lines and not lines[-1].strip():
        lines.pop()
    return "\n".join(lines)


def param_kind_str(kind: int) -> str:
    """Convert inspect parameter kind to a string."""
    if kind == inspect.Parameter.POSITIONAL_ONLY:
        return "POSITIONAL_ONLY"
    if kind == inspect.Parameter.POSITIONAL_OR_KEYWORD:
        return "POSITIONAL_OR_KEYWORD"
    if kind == inspect.Parameter.VAR_POSITIONAL:
        return "VAR_POSITIONAL"
    if kind == inspect.Parameter.KEYWORD_ONLY:
        return "KEYWORD_ONLY"
    if kind == inspect.Parameter.VAR_KEYWORD:
        return "VAR_KEYWORD"
    msg = f"Unknown parameter kind: {kind}"
    raise ValueError(msg)


# bpy.props param names whose type is always set[str] (string option enums)
PROP_SET_PARAMS = {"options", "override", "tags", "search_options"}


def refine_types_by_context(
    func_name: str,
    param_types: dict[str, str],
    return_type: str | None,
) -> tuple[dict[str, str], str | None]:
    """Refine imprecise types using function name context.

    For example, BoolVectorProperty's 'default' param with bare 'Sequence'
    can be refined to 'Sequence[bool]' from the function name.
    """
    is_property_func = func_name.endswith("Property")

    element_type_map: dict[str, str] = {
        "Bool": "bool",
        "Float": "float",
        "Int": "int",
    }
    for prefix, element_type in element_type_map.items():
        if func_name.startswith(prefix) and "Vector" in func_name:
            for pname, ptype in param_types.items():
                if pname == "default" and ptype in ("Sequence", "Sequence[object]"):
                    param_types[pname] = f"Sequence[{element_type}]"

    # bpy.props *Property functions: all set types contain string enum values
    if is_property_func:
        for pname, ptype in param_types.items():
            if "set[object]" in ptype:
                param_types[pname] = ptype.replace("set[object]", "set[str]")

    if return_type in ("Generator", "Generator[object, None, None]"):
        return_type = "Generator[str, None, None]"

    return param_types, return_type


def parse_rst_function_sig(
    docstring: str,
) -> dict[str, tuple[str | None, str]]:
    """Parse the '.. function:: name(args)' RST directive for defaults and kinds.

    Returns {param_name: (default_value_or_None, kind_str)}.
    """
    result: dict[str, tuple[str | None, str]] = {}
    # Find the function signature, handling nested parens in defaults like set()
    match = re.search(r"\.\.\s+(?:function|method|class)::\s+\w+\(", docstring)
    if not match:
        return result

    # Extract content between outermost parens, respecting nesting
    start = match.end()
    depth = 1
    i = start
    while i < len(docstring) and depth > 0:
        if docstring[i] == "(":
            depth += 1
        elif docstring[i] == ")":
            depth -= 1
        i += 1
    sig_str = docstring[start : i - 1]
    # Strip RST optional parameter brackets:
    #   "data[, position]" -> "data, position"
    #   "[rows]" -> "rows"  (all-optional)
    # These indicate optional params in RST, not Python generics.
    # Process from innermost outward to handle nested brackets like "a[, b[, c]]"
    while "[," in sig_str:
        sig_str = re.sub(r"\[,([^\[\]]*)\]", r",\1", sig_str)
    # Handle remaining RST optional brackets: "[param]" or "[param=default]"
    # Only strip brackets that wrap param-like content (identifiers, not types)
    while re.search(r"\[(?!['\"])\w+[^\[\]]*\]", sig_str):
        sig_str = re.sub(r"\[(\w+[^\[\]]*)\]", r"\1", sig_str)
    parts: list[str] = []
    current: list[str] = []
    depth = 0
    for ch in sig_str:
        if ch in "({[":
            depth += 1
            current.append(ch)
        elif ch in ")}]":
            depth -= 1
            current.append(ch)
        elif ch == "," and depth == 0:
            parts.append("".join(current))
            current = []
        else:
            current.append(ch)
    if current:
        parts.append("".join(current))

    kind = "POSITIONAL_OR_KEYWORD"
    for part in parts:
        part = part.strip()
        if not part:
            continue
        if part == "/":
            # Positional-only separator: mark all preceding params as POSITIONAL_ONLY
            for pname in result:
                result[pname] = (result[pname][0], "POSITIONAL_ONLY")
            continue
        if part == "*":
            kind = "KEYWORD_ONLY"
            continue
        if part.startswith("**"):
            param_name = part.lstrip("*").split("=")[0].strip()
            result[param_name] = (None, "VAR_KEYWORD")
            continue
        if part.startswith("*"):
            param_name = part.lstrip("*").split("=")[0].strip()
            result[param_name] = (None, "VAR_POSITIONAL")
            kind = "KEYWORD_ONLY"
            continue

        if "=" in part:
            param_name, default = part.split("=", 1)
            result[param_name.strip()] = (sanitize_default(default.strip()), kind)
        else:
            result[part.strip()] = (None, kind)

    return result


def introspect_callable(func: Callable[..., object], name: str) -> FunctionData | None:
    """Introspect a callable (function or builtin) and return its metadata."""
    docstring = inspect.getdoc(func) or ""
    param_types, return_type = parse_docstring_types(docstring)
    param_types, return_type = refine_types_by_context(name, param_types, return_type)

    try:
        sig = inspect.signature(func)
    # fmt: off
    except (ValueError, TypeError):
        # fmt: on
        # C extension without signature — build params from docstring :type:
        # and extract defaults/kinds from RST ".. function::" directive
        rst_sig = parse_rst_function_sig(docstring)
        params: list[ParamData] = []

        if rst_sig:
            # RST signature has the authoritative param names and order.
            # Match :type: info by name first, then positionally for mismatches.
            # Positional fallback only fires when ALL :type: names are mismatched
            # (i.e. the docstring uses different names than the RST signature).
            rst_names = set(rst_sig.keys())
            any_name_match = bool(rst_names & set(param_types.keys()))
            type_values = list(param_types.values())
            positional_idx = 0
            for rst_name, (default, kind) in rst_sig.items():
                param_type = param_types.get(rst_name)
                if param_type is None and not any_name_match and type_values:
                    # Positional fallback: all :type: names differ from RST names
                    if positional_idx < len(type_values):
                        param_type = type_values[positional_idx]
                        positional_idx += 1
                if (
                    default == "None"
                    and param_type
                    and not re.search(r"\| None\b", param_type)
                ):
                    param_type = param_type + " | None"
                params.append(
                    {
                        "name": rst_name,
                        "type": param_type,
                        "default": default,
                        "kind": kind,
                    }
                )
        else:
            # No RST signature — use :type: directives only
            for param_name, param_type in param_types.items():
                params.append(
                    {
                        "name": param_name,
                        "type": param_type,
                        "default": None,
                        "kind": "POSITIONAL_OR_KEYWORD",
                    }
                )
        return {
            "name": name,
            "doc": clean_docstring(docstring),
            "params": params,
            "return_type": return_type,
            "is_classmethod": False,
        }

    # Build positional fallback for param name mismatches:
    # C functions often use generic names like "object" in __text_signature__
    # while docstrings use descriptive names like "string", "cls", etc.
    doc_param_list = list(param_types.items())
    sig_param_list = list(sig.parameters.items())

    params = []
    for i, (pname, param) in enumerate(sig_param_list):
        default: str | None = None
        if param.default is not inspect.Parameter.empty:
            default = sanitize_default(repr(param.default))

        type_str = param_types.get(pname)
        actual_name = pname

        # Positional fallback: use docstring name + type when sig name doesn't match
        if type_str is None and i < len(doc_param_list):
            doc_name, doc_type = doc_param_list[i]
            if doc_name not in sig.parameters:
                type_str = doc_type
                actual_name = doc_name

        if default == "None" and type_str and not re.search(r"\| None\b", type_str):
            type_str = type_str + " | None"

        params.append(
            {
                "name": actual_name,
                "type": type_str,
                "default": default,
                "kind": param_kind_str(param.kind),
            }
        )

    return {
        "name": name,
        "doc": clean_docstring(docstring),
        "params": params,
        "return_type": return_type,
        "is_classmethod": False,
    }


RUNTIME_TYPE_QUALIFICATIONS: dict[str, str] = {
    "Context": "bpy.types.Context",
    "BlendData": "bpy.types.BlendData",
    "bpy_app_translations": "object",
    "dict": "dict[str, object]",
    "tuple": "tuple[object, ...]",
    "OrderedDict": "collections.OrderedDict[str, object]",
    "Callable": "Callable[..., object]",
    "ShaderWrapper": "object",
}


def python_type_name(obj: object, var_name: str = "") -> str:
    """Get a reasonable type annotation string for a Python object."""
    type_name = type(obj).__name__
    if type_name in C_INTERNAL_TYPES:
        return "object"
    if type_name == var_name:
        return "object"
    if type_name in RUNTIME_TYPE_QUALIFICATIONS:
        return RUNTIME_TYPE_QUALIFICATIONS[type_name]
    if isinstance(obj, type):
        return f"type[{obj.__name__}]"
    # Parameterize containers by inspecting their runtime contents.
    # Cast before list() to avoid basedpyright inferring set[Unknown]/list[Unknown]
    # from isinstance narrowing of `object`.
    if type_name in ("set", "frozenset", "list"):
        from collections.abc import Iterable

        contents = list(cast(Iterable[object], obj))
        if contents:
            elem_type = type(contents[0]).__name__
            if elem_type in C_INTERNAL_TYPES:
                elem_type = "object"
            elif elem_type in RUNTIME_TYPE_QUALIFICATIONS:
                elem_type = RUNTIME_TYPE_QUALIFICATIONS[elem_type]
        elif isinstance(obj, (set, frozenset)):
            elem_type = "str"
        else:
            elem_type = "object"
        return f"{type_name}[{elem_type}]"
    return type_name


def _parse_class_constructor(class_doc: str, cls: type) -> FunctionData | None:
    """Parse a ``.. class:: ClassName(params)`` RST directive into an __init__ method.

    C extension types expose constructor info in their class docstring rather
    than via an inspectable ``__init__``.  Returns None if no constructor
    directive is found or if the constructor takes no parameters.
    """
    # Check if this class already has an inspectable __init__ with a real signature
    init = cls.__dict__.get("__init__")
    if init is not None:
        try:
            sig = inspect.signature(init)
            # Has real params beyond just *args/**kwargs → skip RST parsing
            real_params = [
                p
                for p in sig.parameters.values()
                if p.name != "self"
                and p.kind
                not in (
                    inspect.Parameter.VAR_POSITIONAL,
                    inspect.Parameter.VAR_KEYWORD,
                )
            ]
            if real_params:
                return None
        except (ValueError, TypeError):
            pass

    # Look for ".. class:: ClassName(params)" in the docstring
    if not re.search(r"\.\.\s+class::", class_doc):
        return None

    rst_sig = parse_rst_function_sig(class_doc)
    if not rst_sig:
        return None

    param_types, _ = parse_docstring_types(class_doc)

    params: list[ParamData] = []
    for param_name, (default, kind) in rst_sig.items():
        param_type = param_types.get(param_name)
        if default == "None" and param_type and not re.search(r"\| None\b", param_type):
            param_type = param_type + " | None"
        params.append(
            {
                "name": param_name,
                "type": param_type,
                "default": default,
                "kind": kind,
            }
        )

    if not params:
        return None

    return {
        "name": "__init__",
        "doc": "",
        "params": params,
        "return_type": "None",
        "is_classmethod": False,
    }


def introspect_class(cls: type, module_name: str) -> StructData:
    """Introspect a class (C extension or Python) and return StructData."""
    # Determine base class (skip object and internal bases)
    bases = [
        b for b in cls.__mro__[1:] if b is not object and b.__module__ != "builtins"
    ]
    base_name: str | None = None
    if bases:
        base_cls = bases[0]
        # Only use the base if it's accessible (in the same module's public API
        # or fully qualified from another module)
        parent_mod = importlib.import_module(base_cls.__module__)
        public = getattr(parent_mod, "__all__", None)
        is_public = public is None or base_cls.__name__ in public
        if is_public:
            if base_cls.__module__ == module_name:
                base_name = base_cls.__name__
            else:
                base_name = f"{base_cls.__module__}.{base_cls.__name__}"

    properties: list[PropertyData] = []
    methods: list[FunctionData] = []

    for name in sorted(dir(cls)):
        if name.startswith("_"):
            continue

        try:
            obj = getattr(cls, name)
        except AttributeError:
            continue

        # Check if this member is defined on this class, not inherited
        if name not in cls.__dict__:
            continue

        raw = cls.__dict__[name]

        if (
            isinstance(raw, classmethod)
            or type(raw).__name__ == "classmethod_descriptor"
        ):
            func_data = introspect_callable(obj, name)
            if func_data:
                func_data["is_classmethod"] = True
                methods.append(func_data)
        elif isinstance(raw, staticmethod):
            func_data = introspect_callable(obj, name)
            if func_data:
                methods.append(func_data)
        elif callable(obj):
            func_data = introspect_callable(obj, name)
            if func_data:
                methods.append(func_data)
        elif isinstance(raw, property) or type(raw).__name__ == "getset_descriptor":
            doc = inspect.getdoc(raw) or ""
            _, rtype = parse_docstring_types(doc)
            is_readonly = not hasattr(raw, "fset") or raw.fset is None
            properties.append(
                {
                    "name": name,
                    "type": rtype or "object",
                    "is_readonly": is_readonly,
                    "description": doc,
                }
            )
        else:
            properties.append(
                {
                    "name": name,
                    "type": python_type_name(obj, name),
                    "is_readonly": True,
                    "description": "",
                }
            )

    # Generate __init__ from the class docstring's ".. class::" RST directive
    # (C extension types expose constructor info this way, not via __init__)
    class_doc = inspect.getdoc(cls) or ""
    init_method = _parse_class_constructor(class_doc, cls)
    if init_method:
        methods.insert(0, init_method)

    return {
        "name": cls.__name__,
        "doc": class_doc,
        "base": base_name,
        "properties": properties,
        "methods": methods,
    }


def infer_getter_return_types(functions: list[FunctionData]) -> None:
    """Infer return types for *_get functions from matching *_set parameters.

    Many Blender modules (e.g. gpu.state) follow a pattern where ``foo_set(value)``
    and ``foo_get()`` are paired.  When the getter has no return type but the setter
    has a typed parameter, the getter's return type is inferred from it.
    """
    setters: dict[str, str] = {}
    for func in functions:
        name = func["name"]
        if not name.endswith("_set"):
            continue
        params = func["params"]
        if len(params) != 1:
            continue
        param_type = params[0].get("type")
        if param_type:
            prefix = name[: -len("_set")]
            setters[prefix] = param_type

    for func in functions:
        name = func["name"]
        if not name.endswith("_get"):
            continue
        if func["return_type"] is not None:
            continue
        prefix = name[: -len("_get")]
        if prefix in setters:
            func["return_type"] = setters[prefix]


def introspect_module(module_name: str) -> ModuleData:
    """Introspect a module and return its full metadata as a dict."""
    if module_name == "bpy.types":
        return introspect_rna_types()

    module = importlib.import_module(module_name)

    # Use __all__ as the base, but also include public callables from dir()
    # that are defined in this module (not imported from elsewhere).
    # This catches functions like is_path_builtin that are in the module
    # but not in __all__.
    all_attr: tuple[str, ...] | None = getattr(module, "__all__", None)
    if all_attr is not None:
        names_set = set(all_attr)
        for n in dir(module):
            if n.startswith("_") or n in names_set:
                continue
            obj = getattr(module, n, None)
            if obj is None:
                continue
            # Only add functions/classes defined in this module
            obj_module = getattr(obj, "__module__", None)
            if obj_module == module_name and (callable(obj) or isinstance(obj, type)):
                names_set.add(n)
            # Also add type aliases (e.g. FCurveKey = Tuple[str, int])
            # but not stdlib re-exports (Iterable, Sequence, etc.)
            elif hasattr(obj, "__origin__") and n not in {
                "Callable", "Collection", "Generator", "Iterable",
                "Iterator", "Mapping", "MutableMapping", "MutableSequence",
                "MutableSet", "Sequence", "Set", "FrozenSet",
                "Dict", "List", "Tuple", "Type", "Optional", "Union",
            }:
                names_set.add(n)
        public_names: list[str] = sorted(names_set)
    else:
        public_names = [n for n in dir(module) if not n.startswith("_")]

    functions: list[FunctionData] = []
    variables: list[VariableData] = []
    structs: list[StructData] = []

    for name in sorted(public_names):
        obj = getattr(module, name, None)
        if obj is None:
            continue

        # Skip submodules
        if isinstance(obj, ModuleType):
            continue

        if isinstance(obj, type):
            structs.append(introspect_class(obj, module_name))
        elif hasattr(obj, "__origin__") or (
            hasattr(obj, "__module__") and getattr(obj, "__module__", "") == "typing"
        ):
            # Type alias (e.g. FCurveKey = Tuple[str, int])
            type_repr = str(obj).replace("typing.", "")
            # Normalize old-style typing generics to PEP 585 (Tuple -> tuple, etc.)
            type_repr = re.sub(r"\bTuple\b", "tuple", type_repr)
            type_repr = re.sub(r"\bList\b", "list", type_repr)
            type_repr = re.sub(r"\bDict\b", "dict", type_repr)
            type_repr = re.sub(r"\bSet\b", "set", type_repr)
            type_repr = re.sub(r"\bFrozenSet\b", "frozenset", type_repr)
            variables.append(
                {
                    "name": name,
                    "type": f"TypeAlias",
                    "value": type_repr,
                }
            )
        elif callable(obj):
            func_data = introspect_callable(obj, name)
            if func_data:
                functions.append(func_data)
        else:
            variables.append(
                {
                    "name": name,
                    "type": python_type_name(obj, name),
                    "value": repr(obj),
                }
            )

    infer_getter_return_types(functions)

    return {
        "module": module_name,
        "doc": inspect.getdoc(module) or "",
        "functions": functions,
        "variables": variables,
        "structs": structs,
    }


def _try_import_or_attr(module_name: str) -> bool:
    """Try to import a module, falling back to attribute lookup on parent.

    Returns True if the module is now accessible via importlib.
    """
    try:
        importlib.import_module(module_name)
        return True
    except ImportError:
        pass

    # Fallback: access the submodule via attribute lookup on the parent
    # and register it in sys.modules so importlib works later.
    # This is needed for C-level submodules in older Blender versions (< 4.1).
    parts = module_name.split(".")
    try:
        parent = importlib.import_module(parts[0])
        obj: object = parent
        for part in parts[1:]:
            obj = getattr(obj, part)
        if isinstance(obj, ModuleType):
            sys.modules[module_name] = obj
            return True
    except (ImportError, AttributeError):
        pass

    return False


def _discover_submodules_via_dir(mod: ModuleType, parent_name: str) -> list[str]:
    """Discover C-level submodules by inspecting dir() for ModuleType attributes.

    pkgutil.walk_packages only works for filesystem-backed packages with __path__.
    Many Blender modules (gpu.state, bpy.app.handlers, etc.) are C-level and
    only discoverable via attribute access.
    """
    found: list[str] = []
    for attr_name in dir(mod):
        if attr_name.startswith("_"):
            continue
        obj = getattr(mod, attr_name, None)
        if isinstance(obj, ModuleType):
            submodule_name = f"{parent_name}.{attr_name}"
            # Verify the module actually belongs to this parent
            # (filter out stray re-exports like 'sys', 'os', etc.)
            obj_name = getattr(obj, "__name__", "")
            if obj_name == submodule_name or obj_name.startswith(parent_name + "."):
                found.append(submodule_name)
    return found


def discover_modules() -> list[str]:
    """Discover all Blender Python modules and submodules."""
    modules: list[str] = []
    seen: set[str] = set()

    def _add(name: str) -> bool:
        if name in seen:
            return False
        seen.add(name)
        modules.append(name)
        return True

    for top_name in BLENDER_MODULES:
        try:
            mod = importlib.import_module(top_name)
        except ImportError:
            print(f"  Skipping {top_name} (import failed)", file=sys.stderr)
            continue

        _add(top_name)

        # Discover via pkgutil for filesystem-backed packages
        if hasattr(mod, "__path__"):
            for _importer, subname, _ispkg in pkgutil.walk_packages(
                mod.__path__, prefix=top_name + "."
            ):
                try:
                    importlib.import_module(subname)
                    _add(subname)
                except ImportError:
                    print(f"  Skipping {subname} (import failed)", file=sys.stderr)

        # Also discover C-level submodules via dir() attribute inspection
        for subname in _discover_submodules_via_dir(mod, top_name):
            if _try_import_or_attr(subname):
                if _add(subname):
                    # Recurse one level for nested submodules (e.g. bpy.app.handlers)
                    sub_mod = importlib.import_module(subname)
                    for nested in _discover_submodules_via_dir(sub_mod, subname):
                        if _try_import_or_attr(nested):
                            _add(nested)

    # Add hardcoded extra modules that can't be discovered via dir() either
    # (e.g. modules only accessible after explicit import in some versions)
    for extra in EXTRA_MODULES:
        if extra not in seen and _try_import_or_attr(extra):
            _add(extra)

    return modules


# --- RNA introspection (bpy.types) ---

RNA_TYPE_MAP: dict[str, str] = {
    "boolean": "bool",
    "int": "int",
    "float": "float",
    "string": "str",
    "enum": "str",
}


def rna_property_to_type(prop: object) -> str:
    """Map an RNA property to a PEP 484 type annotation string."""
    prop_type: str = getattr(prop, "type", "")
    fixed_type: object = getattr(prop, "fixed_type", None)
    array_length: int = getattr(prop, "array_length", 0)

    if prop_type == "pointer" and fixed_type is not None:
        type_name: str = getattr(fixed_type, "identifier", "object")
        return type_name

    if prop_type == "collection" and fixed_type is not None:
        # Use the specific collection wrapper class (e.g. BlendDataImages) if
        # available via srna, rather than the generic bpy_prop_collection[T].
        # This preserves collection-specific methods like new(), remove(), etc.
        srna: object = getattr(prop, "srna", None)
        if srna is not None:
            srna_id: str = getattr(srna, "identifier", "")
            if srna_id:
                return srna_id
        element_type: str = getattr(fixed_type, "identifier", "object")
        return f"bpy_prop_collection[{element_type}]"

    # Dynamic-length arrays have array_length=0 but is_array=True on the raw
    # RNA property.  rna_info wraps properties in InfoPropertyRNA which stores
    # the raw prop as bl_prop; fall back to checking the prop itself.
    raw_prop: object = getattr(prop, "bl_prop", prop)
    is_array: bool = getattr(raw_prop, "is_array", False)
    if prop_type in ("float", "int", "boolean") and (array_length > 0 or is_array):
        base = RNA_TYPE_MAP.get(prop_type, prop_type)
        if array_length == 0 and is_array:
            # Dynamic-length array — at runtime this is a bpy_prop_array
            return f"bpy_prop_array[{base}]"
        return f"list[{base}]"

    return RNA_TYPE_MAP.get(prop_type, prop_type)


def rna_function_to_data(func_info: object) -> FunctionData:
    """Convert an RNA function info object to FunctionData."""
    identifier: str = getattr(func_info, "identifier", "")
    description: str = getattr(func_info, "description", "")
    is_classmethod: bool = getattr(func_info, "is_classmethod", False)
    args_list: list[object] = getattr(func_info, "args", [])
    return_values: tuple[object, ...] = getattr(func_info, "return_values", ())

    params: list[ParamData] = []

    if is_classmethod:
        params.append(
            {
                "name": "cls",
                "type": None,
                "default": None,
                "kind": "POSITIONAL_OR_KEYWORD",
            }
        )

    for arg in args_list:
        arg_name: str = getattr(arg, "identifier", "")
        arg_type = rna_property_to_type(arg)
        default_val: str | None = None

        is_required: bool = getattr(arg, "is_required", False)

        if not is_required:
            arg_type = f"{arg_type} | None"
            default_val = "None"

        params.append(
            {
                "name": arg_name,
                "type": arg_type,
                "default": default_val,
                "kind": "POSITIONAL_OR_KEYWORD",
            }
        )

    return_type: str | None = None
    if return_values:
        if len(return_values) == 1:
            return_type = rna_property_to_type(return_values[0])
        else:
            types = [rna_property_to_type(rv) for rv in return_values]
            return_type = f"tuple[{', '.join(types)}]"

    return {
        "name": identifier,
        "doc": description,
        "params": params,
        "return_type": return_type,
        "is_classmethod": is_classmethod,
    }


def _import_rna_info() -> ModuleType:
    """Import the rna_info module, handling different Blender versions."""
    try:
        return importlib.import_module("_rna_info")
    except ImportError:
        return importlib.import_module("rna_info")


def _infer_type_from_runtime_value(value: object) -> str | None:
    """Infer a type string from a runtime bpy.context attribute value."""
    type_name = type(value).__name__

    # Single RNA objects — use the class name directly
    if hasattr(type(value), "bl_rna"):
        return type_name

    if isinstance(value, list):
        contents = cast(list[object], value)
        if contents:
            elem_type = type(contents[0])
            if hasattr(elem_type, "bl_rna"):
                return f"Sequence[{elem_type.__name__}]"
        return None

    if isinstance(value, str):
        return "str"
    if isinstance(value, bool):
        return "bool"
    if isinstance(value, int):
        return "int"
    if isinstance(value, float):
        return "float"

    return None


def introspect_screen_context_members(
    rna_property_names: set[str],
) -> list[PropertyData]:
    """Discover screen context members from bpy.context that aren't in RNA.

    These are dynamically injected by Blender based on the active editor/mode.
    All are typed as T | None since they're context-dependent.
    This function only runs inside Blender's Python environment.
    """
    bpy = importlib.import_module("bpy")
    ctx: object = getattr(bpy, "context")
    skip = {"bl_rna", "id_data", "rna_type"}

    extra_attrs = sorted(
        name
        for name in dir(ctx)
        if not name.startswith("_")
        and name not in rna_property_names
        and name not in skip
        and not (
            callable(getattr(ctx, name))
            and not isinstance(getattr(ctx, name), (list, tuple))
        )
    )

    properties: list[PropertyData] = []
    for name in extra_attrs:
        try:
            value = getattr(ctx, name)
        except AttributeError:
            continue

        type_str: str | None = None

        # Tier 1: runtime inspection (non-None values)
        if value is not None:
            type_str = _infer_type_from_runtime_value(value)

        # Tier 2: hardcoded override
        if type_str is None:
            type_str = SCREEN_CONTEXT_TYPE_OVERRIDES.get(name)

        # Tier 3: name-pattern heuristic
        if type_str is None:
            type_str = infer_context_member_type(name)

        # Final fallback
        if type_str is None:
            type_str = "object"

        # Sequence/collection types are never None — they return empty sequences.
        # Only singular object references (active_object, etc.) can be None.
        is_collection = type_str.startswith("Sequence[") or type_str.startswith(
            "bpy_prop_collection["
        )
        final_type = type_str if is_collection else f"{type_str} | None"
        properties.append(
            {
                "name": name,
                "type": final_type,
                "is_readonly": True,
                "description": "",
            }
        )

    # Also inject overrides not found in dir() (e.g. buttons context members
    # like meta_ball, mesh, armature that require active UI panels)
    discovered = {p["name"] for p in properties}
    for name, type_str in sorted(SCREEN_CONTEXT_TYPE_OVERRIDES.items()):
        if name not in discovered and name not in rna_property_names:
            is_collection = type_str.startswith("Sequence[") or type_str.startswith(
                "bpy_prop_collection["
            )
            final_type = type_str if is_collection else f"{type_str} | None"
            properties.append(
                {
                    "name": name,
                    "type": final_type,
                    "is_readonly": True,
                    "description": "",
                }
            )

    return properties


def _validate_context_prop_type(type_str: str, known_types: set[str]) -> str:
    """Replace type references that don't exist in this version with 'object'."""
    import re as _re

    def _replace_match(match: re.Match[str]) -> str:
        name = match.group(1)
        if name == "None":
            return name
        # "X[" is a generic usage (e.g. Sequence[...]) — keep it
        end = match.end()
        if end < len(type_str) and type_str[end] == "[":
            return name
        if name not in known_types:
            return "object"
        return name

    return _re.sub(r"\b([A-Z]\w+)\b", _replace_match, type_str)


def introspect_rna_types() -> ModuleData:
    """Introspect all RNA-defined types using rna_info.BuildRNAInfo()."""
    rna_info = _import_rna_info()

    info = rna_info.BuildRNAInfo()
    structs_dict = info[0]

    # Introspect the C-level base classes that aren't in RNA but are in bpy.types.
    # These provide fundamental methods like __getitem__, foreach_get, etc.
    _bpy_types = importlib.import_module("bpy.types")

    # Generic base classes need manual type parameter annotation since
    # introspection can't discover Python generics from C types.
    _GENERIC_BASES: dict[str, str] = {
        "bpy_prop_collection": "Generic[_T]",
        "bpy_prop_array": "Generic[_T]",
    }

    # Dunder methods for generic types can't be discovered from runtime since
    # they need generic type parameters (_T).  Define them explicitly.
    _COLLECTION_DUNDERS: list[FunctionData] = [
        {
            "name": "__getitem__",
            "doc": "",
            "params": [
                {
                    "name": "key",
                    "type": "int | str",
                    "default": None,
                    "kind": "POSITIONAL_OR_KEYWORD",
                }
            ],
            "return_type": "_T",
            "is_classmethod": False,
        },
        {
            "name": "__iter__",
            "doc": "",
            "params": [],
            "return_type": "Iterator[_T]",
            "is_classmethod": False,
        },
        {
            "name": "__len__",
            "doc": "",
            "params": [],
            "return_type": "int",
            "is_classmethod": False,
        },
        {
            "name": "__contains__",
            "doc": "",
            "params": [
                {
                    "name": "key",
                    "type": "str",
                    "default": None,
                    "kind": "POSITIONAL_OR_KEYWORD",
                }
            ],
            "return_type": "bool",
            "is_classmethod": False,
        },
    ]
    _ARRAY_DUNDERS: list[FunctionData] = [
        {
            "name": "__getitem__",
            "doc": "",
            "params": [
                {
                    "name": "index",
                    "type": "int",
                    "default": None,
                    "kind": "POSITIONAL_OR_KEYWORD",
                }
            ],
            "return_type": "_T",
            "is_classmethod": False,
        },
        {
            "name": "__setitem__",
            "doc": "",
            "params": [
                {
                    "name": "index",
                    "type": "int",
                    "default": None,
                    "kind": "POSITIONAL_OR_KEYWORD",
                },
                {
                    "name": "value",
                    "type": "_T",
                    "default": None,
                    "kind": "POSITIONAL_OR_KEYWORD",
                },
            ],
            "return_type": "None",
            "is_classmethod": False,
        },
        {
            "name": "__delitem__",
            "doc": "",
            "params": [
                {
                    "name": "index",
                    "type": "int | slice",
                    "default": None,
                    "kind": "POSITIONAL_OR_KEYWORD",
                }
            ],
            "return_type": "None",
            "is_classmethod": False,
        },
        {
            "name": "__iter__",
            "doc": "",
            "params": [],
            "return_type": "Iterator[_T]",
            "is_classmethod": False,
        },
        {
            "name": "__len__",
            "doc": "",
            "params": [],
            "return_type": "int",
            "is_classmethod": False,
        },
        {
            "name": "__contains__",
            "doc": "",
            "params": [
                {
                    "name": "value",
                    "type": "_T",
                    "default": None,
                    "kind": "POSITIONAL_OR_KEYWORD",
                }
            ],
            "return_type": "bool",
            "is_classmethod": False,
        },
    ]
    _EXTRA_DUNDERS: dict[str, list[FunctionData]] = {
        "bpy_prop_collection": _COLLECTION_DUNDERS,
        "bpy_prop_array": _ARRAY_DUNDERS,
    }

    structs: list[StructData] = []

    # Introspect bpy_struct (base of all RNA types)
    bpy_struct_cls = getattr(_bpy_types, "bpy_struct", None)
    if bpy_struct_cls is not None:
        structs.append(introspect_class(bpy_struct_cls, "bpy.types"))

    for cls_name in ["bpy_prop_collection", "bpy_prop_array"]:
        cls = getattr(_bpy_types, cls_name, None)
        if cls is not None:
            struct = introspect_class(cls, "bpy.types")
            struct["base"] = _GENERIC_BASES[cls_name]
            struct["methods"] = _EXTRA_DUNDERS[cls_name] + struct["methods"]
            structs.append(struct)

    # Build a map of collection wrapper class -> element type.
    # When a property has type=collection and an srna, the srna identifies
    # the wrapper class (e.g. BlendDataImages) and fixed_type is the element
    # (e.g. Image). These wrappers should inherit from bpy_prop_collection[T].
    collection_element_types: dict[str, str] = {}
    for struct_info in structs_dict.values():
        for prop in struct_info.properties:
            if prop.type != "collection" or not prop.fixed_type:
                continue
            srna: object = getattr(prop, "srna", None)
            if srna is not None:
                srna_id: str = getattr(srna, "identifier", "")
                if srna_id:
                    collection_element_types[srna_id] = prop.fixed_type.identifier

    for struct_info in sorted(structs_dict.values(), key=lambda s: s.identifier):
        base_name: str | None = None
        sid = struct_info.identifier
        if sid in collection_element_types:
            # Collection wrapper class — inherit from bpy_prop_collection[T]
            base_name = f"bpy_prop_collection[{collection_element_types[sid]}]"
        elif struct_info.base:
            base_name = struct_info.base.identifier
        else:
            # All RNA types implicitly inherit from bpy_struct
            base_name = "bpy_struct"

        properties: list[PropertyData] = []
        for prop in struct_info.properties:
            properties.append(
                {
                    "name": prop.identifier,
                    "type": rna_property_to_type(prop),
                    "is_readonly": prop.is_readonly,
                    "description": prop.description or "",
                }
            )

        methods: list[FunctionData] = []
        is_collection_wrapper = sid in collection_element_types
        for func_info in struct_info.functions:
            # Skip RNA methods that would incompatibly override bpy_prop_collection
            if is_collection_wrapper and func_info.identifier in ("find", "get"):
                continue
            methods.append(rna_function_to_data(func_info))

        structs.append(
            {
                "name": struct_info.identifier,
                "doc": struct_info.description or "",
                "base": base_name,
                "properties": properties,
                "methods": methods,
            }
        )

    # Add screen context members to the Context struct
    known_types = {s["name"] for s in structs}
    for struct in structs:
        if struct["name"] == "Context":
            rna_names = {p["name"] for p in struct["properties"]}
            rna_names |= {m["name"] for m in struct["methods"]}
            screen_props = introspect_screen_context_members(rna_names)
            # Validate type references and qualify Sequence to avoid
            # shadowing by bpy.types.Sequence (video sequencer strip)
            for prop in screen_props:
                prop["type"] = _validate_context_prop_type(prop["type"], known_types)
                prop["type"] = prop["type"].replace(
                    "Sequence[", "collections.abc.Sequence["
                )
            struct["properties"].extend(screen_props)
            break

    return {
        "module": "bpy.types",
        "doc": "Blender RNA type definitions.",
        "functions": [],
        "variables": [],
        "structs": structs,
    }


@dataclass
class IntrospectArgs:
    output: str | None = None


def main() -> None:
    argv = sys.argv
    if "--" in argv:
        argv = argv[argv.index("--") + 1 :]
    else:
        argv = []

    parser = argparse.ArgumentParser(description="Introspect Blender Python modules")
    parser.add_argument(
        "--output", default=None, help="Output JSON file (default: stdout)"
    )
    parsed = parser.parse_args(argv)
    args = IntrospectArgs(output=parsed.output)

    print("Discovering modules...", file=sys.stderr)
    module_names = discover_modules()
    print(f"Found {len(module_names)} modules", file=sys.stderr)

    results: list[ModuleData] = []
    for module_name in module_names:
        print(f"  Introspecting {module_name}...", file=sys.stderr)
        results.append(introspect_module(module_name))

    output = json.dumps(results, indent=2)

    if args.output:
        with open(args.output, "w") as f:
            f.write(output)
        print(f"Written to {args.output}", file=sys.stderr)
    else:
        print("__INTROSPECT_JSON_START__")
        print(output)
        print("__INTROSPECT_JSON_END__")


if __name__ == "__main__":
    main()