metaflow/__init__.py (111 lines of code) (raw):
"""
Welcome to Metaflow!
Metaflow is a microframework for data science projects.
There are two main use cases for this package:
1) You can define new flows using the `FlowSpec`
class and related decorators.
2) You can access and inspect existing flows.
You can instantiate a `Metaflow` class to
get an entry point to all existing objects.
# How to work with flows
A flow is a directed graph of Python functions called steps.
Metaflow takes care of executing these steps one by one in various
environments, such as on a local laptop or compute environments
(such as AWS Batch for example). It snapshots
data and code related to each run, so you can resume, reproduce,
and inspect results easily at a later point in time.
Here is a high-level overview of objects related to flows:
[ FlowSpec ] (0) Base class for flows.
[ MyFlow ] (1) Subclass from FlowSpec to define a new flow.
define new flows
----------------- (2) Run MyFlow on the command line.
access results
[ Flow ] (3) Access your flow with `Flow('MyFlow')`.
[ Run ] (4) Access a specific run with `Run('MyFlow/RunID')`.
[ Step ] (5) Access a specific step by its name, e.g. `run['end']`.
[ Task ] (6) Access a task related to the step with `step.task`.
[ DataArtifact ] (7) Access data of a task with `task.data`.
# More questions?
If you have any questions, feel free to post a bug report/question on the
Metaflow GitHub page.
"""
import os
import sys
from metaflow.extension_support import (
alias_submodules,
get_modules,
lazy_load_aliases,
load_globals,
load_module,
EXT_PKG,
_ext_debug,
)
# We load the module overrides *first* explicitly. Non overrides can be loaded
# in toplevel as well but these can be loaded first if needed. Note that those
# modules should be careful not to include anything in Metaflow at their top-level
# as it is likely to not work.
_override_modules = []
_tl_modules = []
try:
_modules_to_import = get_modules("toplevel")
for m in _modules_to_import:
override_module = m.module.__dict__.get("module_overrides", None)
if override_module is not None:
_override_modules.append(
".".join([EXT_PKG, m.tl_package, "toplevel", override_module])
)
tl_module = m.module.__dict__.get("toplevel", None)
if tl_module is not None:
_tl_modules.append(
(
m.package_name,
".".join([EXT_PKG, m.tl_package, "toplevel", tl_module]),
)
)
_ext_debug("Got overrides to load: %s" % _override_modules)
_ext_debug("Got top-level imports: %s" % str(_tl_modules))
except Exception as e:
_ext_debug("Error in importing toplevel/overrides: %s" % e)
# Load overrides now that we have them (in the proper order)
for m in _override_modules:
extension_module = load_module(m)
if extension_module:
# We load only modules
tl_package = m.split(".")[1]
lazy_load_aliases(alias_submodules(extension_module, tl_package, None))
# Utilities
from .multicore_utils import parallel_imap_unordered, parallel_map
from .metaflow_profile import profile
# current runtime singleton
from .metaflow_current import current
# Flow spec
from .flowspec import FlowSpec
from .parameters import Parameter, JSONTypeClass, JSONType
from .user_configs.config_parameters import Config, ConfigValue, config_expr
from .user_configs.config_decorators import CustomFlowDecorator, CustomStepDecorator
# data layer
# For historical reasons, we make metaflow.plugins.datatools accessible as
# metaflow.datatools. S3 is also a tool that has historically been available at the
# top-level so keep as is.
lazy_load_aliases({"metaflow.datatools": "metaflow.plugins.datatools"})
from .plugins.datatools import S3
# includefile
from .includefile import IncludeFile
# Decorators
from .decorators import step, _import_plugin_decorators
# Parsers (for configs) for now
from .plugins import _import_tl_plugins
_import_tl_plugins(globals())
# this auto-generates decorator functions from Decorator objects
# in the top-level metaflow namespace
_import_plugin_decorators(globals())
# Setting card import for only python 3.4
if sys.version_info[0] >= 3 and sys.version_info[1] >= 4:
from . import cards
# Client
from .client import (
namespace,
get_namespace,
default_namespace,
metadata,
get_metadata,
default_metadata,
Metaflow,
Flow,
Run,
Step,
Task,
DataArtifact,
)
# Import data class within tuple_util but not introduce new symbols.
from . import tuple_util
# Runner API
if sys.version_info >= (3, 7):
from .runner.metaflow_runner import Runner
from .runner.nbrun import NBRunner
from .runner.deployer import Deployer
from .runner.deployer import DeployedFlow
from .runner.nbdeploy import NBDeployer
__ext_tl_modules__ = []
_ext_debug("Loading top-level modules")
for pkg_name, m in _tl_modules:
extension_module = load_module(m)
if extension_module:
tl_package = m.split(".")[1]
load_globals(extension_module, globals(), extra_indent=True)
lazy_load_aliases(
alias_submodules(extension_module, tl_package, None, extra_indent=True)
)
__ext_tl_modules__.append((pkg_name, extension_module))
# Erase all temporary names to avoid leaking things
for _n in [
"_ext_debug",
"alias_submodules",
"get_modules",
"lazy_load_aliases",
"load_globals",
"load_module",
EXT_PKG,
"_override_modules",
"_tl_modules",
"_modules_to_import",
"m",
"override_module",
"tl_module",
"extension_module",
"tl_package",
"version_info",
]:
try:
del globals()[_n]
except KeyError:
pass
del globals()["_n"]
from .version import metaflow_version as _mf_version
__version__ = _mf_version