Source code for dffml.cli.cli

# SPDX-License-Identifier: MIT
# Copyright (c) 2019 Intel Corporation
"""
Command line interface evaluates packages given their source URLs
"""
import pathlib
import pdb
import sys
import traceback
import contextlib
import subprocess
import pkg_resources
import importlib.util
from typing import Union

from .log import LOGGER
from ..version import VERSION
from ..record import Record
from ..feature.feature import Features
from ..df.types import DataFlow
from ..plugins import PACKAGE_NAMES_BY_PLUGIN, PACKAGE_NAMES_TO_DIRECTORY
from ..source.dfpreprocess import (
    DataFlowPreprocessSource,
    DataFlowPreprocessSourceConfig,
)
from ..source.source import Sources, BaseSource, SubsetSources
from ..configloader.configloader import BaseConfigLoader
from ..util.cli.cmd import CMD
from ..util.cli.cmds import (
    SourcesCMD,
    PortCMD,
    KeysCMD,
    KeysCMDConfig,
    PortCMDConfig,
    SourcesCMDConfig,
)
from ..util.config.fields import FIELD_SOURCES
from ..base import field, config

from .dataflow import Dataflow
from .config import Config
from .ml import Train, Accuracy, Predict
from .list import List

version = VERSION


[docs]@config class VersionConfig: no_errors: bool = field( "Set to ignore errors when loading modules", default=False )
[docs]class Version(CMD): """ Print version and installed dffml packages """ CONFIG = VersionConfig
[docs] @staticmethod async def git_hash(path: Union[pathlib.Path, str]): """ If the path is a git repo we'll return. Examples -------- >>> import pathlib >>> import asyncio >>> import subprocess >>> >>> import dffml.cli.cli >>> >>> subprocess.check_call(["git", "init"]) 0 >>> subprocess.check_call(["git", "config", "user.name", "First Last"]) 0 >>> subprocess.check_call(["git", "config", "user.email", "first.last@example.com"]) 0 >>> pathlib.Path("README.md").write_text("Contents") 8 >>> subprocess.check_call(["git", "add", "README.md"]) 0 >>> subprocess.check_call(["git", "commit", "-m", "First commit"]) 0 >>> dirty, short_hash = asyncio.run(dffml.cli.cli.Version.git_hash(".")) >>> dirty False >>> int(short_hash, 16) > 0 True """ path = pathlib.Path(path).resolve() dirty = None short_hash = None with contextlib.suppress(subprocess.CalledProcessError): dirty = bool( subprocess.call( ["git", "diff-index", "--quiet", "HEAD", "--"], cwd=str(path), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, ) ) short_hash = ( subprocess.check_output( ["git", "show", "-s", "--pretty=%h %D", "HEAD"], cwd=str(path), stderr=subprocess.DEVNULL, ) .decode() .split() )[0] return dirty, short_hash
async def run(self): self.logger.debug("Reporting version") # Versions of plugins for package_name in ["dffml"] + PACKAGE_NAMES_BY_PLUGIN["all"]: version = "not installed" path = "" import_package_name = package_name.replace("-", "_") import_package_name_version = import_package_name + ".version" for module_name in [ import_package_name, import_package_name_version, ]: with contextlib.redirect_stderr( None ), contextlib.redirect_stdout(None): try: module = importlib.import_module(module_name) except ModuleNotFoundError: continue except Exception as error: if self.no_errors: self.logger.error( f"Failed to import {module_name}: {traceback.format_exc().rstrip()}" ) version = "ERROR" continue else: raise sys.modules[module_name] = module if module_name in sys.modules: module = sys.modules[module_name] if module_name.endswith(".version"): version = module.VERSION else: path = module.__path__[0] # Report if code comes from git repo dirty, short_hash = await self.git_hash(path) package_details = [package_name, version] if path: package_details.append(path) if dirty is not None and short_hash is not None: package_details.append(short_hash) if dirty: package_details.append("(dirty git repo)") print(" ".join(package_details))
class Packages(CMD): async def run(self): print( "\n".join( sorted(["dffml"] + list(PACKAGE_NAMES_TO_DIRECTORY.keys())) ) )
[docs]@config class EditCMDConfig: dataflow: str = field( "File containing exported DataFlow", default=None, ) config: BaseConfigLoader = field( "ConfigLoader to use for importing DataFlow", default=None, ) features: Features = field( "Feature definitions of records to update", required=False, default_factory=lambda: [], ) sources: Sources = FIELD_SOURCES
class BaseEditCMD(SourcesCMD): CONFIG = EditCMDConfig async def __aenter__(self): await super().__aenter__() if self.dataflow: dataflow_path = pathlib.Path(self.dataflow) config_cls = self.config if config_cls is None: config_type = dataflow_path.suffix.replace(".", "") config_cls = BaseConfigLoader.load(config_type) async with config_cls.withconfig( self.extra_config ) as configloader: async with configloader() as loader: exported = await loader.loadb(dataflow_path.read_bytes()) self.dataflow = DataFlow._fromdict(**exported) self.sources = DataFlowPreprocessSource( DataFlowPreprocessSourceConfig( source=self.sources, dataflow=self.dataflow, features=self.features, ) )
[docs]class EditAllRecords(BaseEditCMD, SourcesCMD): """ Edit all records using operations """ async def run(self): async with self.sources as src: async with src() as sctx: async for record in sctx.records(): if not self.dataflow: pdb.set_trace() await sctx.update(record)
[docs]@config class EditRecordConfig(EditCMDConfig, KeysCMDConfig): pass
[docs]class EditRecord(EditAllRecords, KeysCMD): """ Edit each specified record """ CONFIG = EditRecordConfig def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.sources = SubsetSources(*self.sources, keys=self.keys)
[docs]class Edit(CMD): """ Edit records """ _all = EditAllRecords record = EditRecord
[docs]@config class MergeConfig: src: BaseSource = field( "Source to pull records from", labeled=True, ) dest: BaseSource = field( "Source to merge records into", labeled=True, )
[docs]class Merge(CMD): """ Merge record data between sources """ CONFIG = MergeConfig async def run(self): async with self.src.withconfig( self.extra_config ) as src, self.dest.withconfig(self.extra_config) as dest: async with src() as sctx, dest() as dctx: async for src in sctx.records(): record = Record(src.key) record.merge(src) record.merge(await dctx.record(record.key)) await dctx.update(record)
[docs]class ImportExportCMDConfig(PortCMDConfig, SourcesCMDConfig): filename: str = field( "Filename", default=None, )
[docs]class ImportExportCMD(PortCMD, SourcesCMD): """Shared import export arguments""" CONFIG = ImportExportCMDConfig
[docs]class Import(ImportExportCMD): """Imports records""" async def run(self): async with self.sources as sources: async with sources() as sctx: return await self.port.import_from_file(sctx, self.filename)
[docs]class Export(ImportExportCMD): """Exports records""" async def run(self): async with self.sources as sources: async with sources() as sctx: return await self.port.export_to_file(sctx, self.filename)
SERVICES_LOGGER = LOGGER.getChild("services")
[docs]def failed_to_load_service(loading_what: str = "services"): """ Sometimes weird dependency issues show up and prevent us from loading anything. We log the traceback in that case. """ SERVICES_LOGGER.error( "Error while loading %s: %s", loading_what, traceback.format_exc() )
[docs]def services(): """ Loads dffml.services.cli entrypoint and creates a CMD class incorporating all of the loaded CLI versions of services as subcommands. """ class Service(CMD): """ Expose various functionalities of dffml """ try: for i in pkg_resources.iter_entry_points("dffml.service.cli"): try: loaded = i.load() except: failed_to_load_service(repr(i)) continue if issubclass(loaded, CMD): setattr(Service, i.name, loaded) except: failed_to_load_service() return Service
[docs]class CLI(CMD): """ CLI interface for dffml """ version = Version packages = Packages _list = List edit = Edit merge = Merge _import = Import export = Export train = Train accuracy = Accuracy predict = Predict service = services() dataflow = Dataflow config = Config