In Python how can one tell if a module comes from a C extension?

后端 未结 4 960
清酒与你
清酒与你 2020-12-15 09:12

What is the correct or most robust way to tell from Python if an imported module comes from a C extension as opposed to a pure Python module? This is useful, for example, i

4条回答
  •  囚心锁ツ
    2020-12-15 09:51

    @Cecil Curry's function is excellent. Two minor comments: firsly, the _elementtree example raises a TypeError with my copy of Python 3.5.6. Secondly, as @crld points out, it's also helpful to know if a module contains C extensions, but a more portable version might help. More generic versions (with Python 3.6+ f-string syntax) may therefore be:

    from importlib.machinery import ExtensionFileLoader, EXTENSION_SUFFIXES
    import inspect
    import logging
    import os
    import os.path
    import pkgutil
    from types import ModuleType
    from typing import List
    
    log = logging.getLogger(__name__)
    
    
    def is_builtin_module(module: ModuleType) -> bool:
        """
        Is this module a built-in module, like ``os``?
        Method is as per :func:`inspect.getfile`.
        """
        return not hasattr(module, "__file__")
    
    
    def is_module_a_package(module: ModuleType) -> bool:
        assert inspect.ismodule(module)
        return os.path.basename(inspect.getfile(module)) == "__init__.py"
    
    
    def is_c_extension(module: ModuleType) -> bool:
        """
        Modified from
        https://stackoverflow.com/questions/20339053/in-python-how-can-one-tell-if-a-module-comes-from-a-c-extension.
    
        ``True`` only if the passed module is a C extension implemented as a
        dynamically linked shared library specific to the current platform.
    
        Args:
            module: Previously imported module object to be tested.
    
        Returns:
            bool: ``True`` only if this module is a C extension.
    
        Examples:
    
        .. code-block:: python
    
            from cardinal_pythonlib.modules import is_c_extension
    
            import os
            import _elementtree as et
            import numpy
            import numpy.core.multiarray as numpy_multiarray
    
            is_c_extension(os)  # False
            is_c_extension(numpy)  # False
            is_c_extension(et)  # False on my system (Python 3.5.6). True in the original example.
            is_c_extension(numpy_multiarray)  # True
    
        """  # noqa
        assert inspect.ismodule(module), f'"{module}" not a module.'
    
        # If this module was loaded by a PEP 302-compliant CPython-specific loader
        # loading only C extensions, this module is a C extension.
        if isinstance(getattr(module, '__loader__', None), ExtensionFileLoader):
            return True
    
        # If it's built-in, it's not a C extension.
        if is_builtin_module(module):
            return False
    
        # Else, fallback to filetype matching heuristics.
        #
        # Absolute path of the file defining this module.
        module_filename = inspect.getfile(module)
    
        # "."-prefixed filetype of this path if any or the empty string otherwise.
        module_filetype = os.path.splitext(module_filename)[1]
    
        # This module is only a C extension if this path's filetype is that of a
        # C extension specific to the current platform.
        return module_filetype in EXTENSION_SUFFIXES
    
    
    def contains_c_extension(module: ModuleType,
                             import_all_submodules: bool = True,
                             include_external_imports: bool = False,
                             seen: List[ModuleType] = None,
                             verbose: bool = False) -> bool:
        """
        Extends :func:`is_c_extension` by asking: is this module, or any of its
        submodules, a C extension?
    
        Args:
            module: Previously imported module object to be tested.
            import_all_submodules: explicitly import all submodules of this module?
            include_external_imports: check modules in other packages that this
                module imports?
            seen: used internally for recursion (to deal with recursive modules);
                should be ``None`` when called by users
            verbose: show working via log?
    
        Returns:
            bool: ``True`` only if this module or one of its submodules is a C
            extension.
    
        Examples:
    
        .. code-block:: python
    
            import logging
    
            import _elementtree as et
            import os
    
            import arrow
            import alembic
            import django
            import numpy
            import numpy.core.multiarray as numpy_multiarray
    
            log = logging.getLogger(__name__)
            logging.basicConfig(level=logging.DEBUG)  # be verbose
    
            contains_c_extension(os)  # False
            contains_c_extension(et)  # False
    
            contains_c_extension(numpy)  # True -- different from is_c_extension()
            contains_c_extension(numpy_multiarray)  # True
    
            contains_c_extension(arrow)  # False
    
            contains_c_extension(alembic)  # False
            contains_c_extension(alembic, include_external_imports=True)  # True
            # ... this example shows that Alembic imports hashlib, which can import
            #     _hashlib, which is a C extension; however, that doesn't stop us (for
            #     example) installing Alembic on a machine with no C compiler
    
            contains_c_extension(django)
    
        """  # noqa
        assert inspect.ismodule(module), f'"{module}" not a module.'
    
        if seen is None:  # only true for the top-level call
            seen = []  # type: List[ModuleType]
        if module in seen:  # modules can "contain" themselves
            # already inspected; avoid infinite loops
            return False
        seen.append(module)
    
        # Check the thing we were asked about
        is_c_ext = is_c_extension(module)
        if verbose:
            log.info(f"Is module {module!r} a C extension? {is_c_ext}")
        if is_c_ext:
            return True
        if is_builtin_module(module):
            # built-in, therefore we stop searching it
            return False
    
        # Now check any children, in a couple of ways
    
        top_level_module = seen[0]
        top_path = os.path.dirname(top_level_module.__file__)
    
        # Recurse using dir(). This picks up modules that are automatically
        # imported by our top-level model. But it won't pick up all submodules;
        # try e.g. for django.
        for candidate_name in dir(module):
            candidate = getattr(module, candidate_name)
            # noinspection PyBroadException
            try:
                if not inspect.ismodule(candidate):
                    # not a module
                    continue
            except Exception:
                # e.g. a Django module that won't import until we configure its
                # settings
                log.error(f"Failed to test ismodule() status of {candidate!r}")
                continue
            if is_builtin_module(candidate):
                # built-in, therefore we stop searching it
                continue
    
            candidate_fname = getattr(candidate, "__file__")
            if not include_external_imports:
                if os.path.commonpath([top_path, candidate_fname]) != top_path:
                    if verbose:
                        log.debug(f"Skipping, not within the top-level module's "
                                  f"directory: {candidate!r}")
                    continue
            # Recurse:
            if contains_c_extension(
                    module=candidate,
                    import_all_submodules=False,  # only done at the top level, below  # noqa
                    include_external_imports=include_external_imports,
                    seen=seen):
                return True
    
        if import_all_submodules:
            if not is_module_a_package(module):
                if verbose:
                    log.debug(f"Top-level module is not a package: {module!r}")
                return False
    
            # Otherwise, for things like Django, we need to recurse in a different
            # way to scan everything.
            # See https://stackoverflow.com/questions/3365740/how-to-import-all-submodules.  # noqa
            log.debug(f"Walking path: {top_path!r}")
            try:
                for loader, module_name, is_pkg in pkgutil.walk_packages([top_path]):  # noqa
                    if not is_pkg:
                        log.debug(f"Skipping, not a package: {module_name!r}")
                        continue
                    log.debug(f"Manually importing: {module_name!r}")
                    # noinspection PyBroadException
                    try:
                        candidate = loader.find_module(module_name)\
                            .load_module(module_name)  # noqa
                    except Exception:
                        # e.g. Alembic "autogenerate" gives: "ValueError: attempted
                        # relative import beyond top-level package"; or Django
                        # "django.core.exceptions.ImproperlyConfigured"
                        log.error(f"Package failed to import: {module_name!r}")
                        continue
                    if contains_c_extension(
                            module=candidate,
                            import_all_submodules=False,  # only done at the top level  # noqa
                            include_external_imports=include_external_imports,
                            seen=seen):
                        return True
            except Exception:
                log.error("Unable to walk packages further; no C extensions "
                          "detected so far!")
                raise
    
        return False
    
    
    # noinspection PyUnresolvedReferences,PyTypeChecker
    def test() -> None:
        import _elementtree as et
    
        import arrow
        import alembic
        import django
        import django.conf
        import numpy
        import numpy.core.multiarray as numpy_multiarray
    
        log.info(f"contains_c_extension(os): "
                 f"{contains_c_extension(os)}")  # False
        log.info(f"contains_c_extension(et): "
                 f"{contains_c_extension(et)}")  # False
    
        log.info(f"is_c_extension(numpy): "
                 f"{is_c_extension(numpy)}")  # False
        log.info(f"contains_c_extension(numpy): "
                 f"{contains_c_extension(numpy)}")  # True
        log.info(f"contains_c_extension(numpy_multiarray): "
                 f"{contains_c_extension(numpy_multiarray)}")  # True  # noqa
    
        log.info(f"contains_c_extension(arrow): "
                 f"{contains_c_extension(arrow)}")  # False
    
        log.info(f"contains_c_extension(alembic): "
                 f"{contains_c_extension(alembic)}")  # False
        log.info(f"contains_c_extension(alembic, include_external_imports=True): "
                 f"{contains_c_extension(alembic, include_external_imports=True)}")  # True  # noqa
        # ... this example shows that Alembic imports hashlib, which can import
        #     _hashlib, which is a C extension; however, that doesn't stop us (for
        #     example) installing Alembic on a machine with no C compiler
    
        django.conf.settings.configure()
        log.info(f"contains_c_extension(django): "
                 f"{contains_c_extension(django)}")  # False
    
    
    if __name__ == '__main__':
        logging.basicConfig(level=logging.INFO)  # be verbose
        test()
    

提交回复
热议问题