# SPDX-License-Identifier: MIT
# Copyright (c) 2019 Intel Corporation
"""
Information on the software to evaluate is stored in a Record instance.
"""
import os
from datetime import datetime
from typing import Optional, List, Dict, Any
from .util.data import merge, export
from .util.display import create_row
from .log import LOGGER
LOGGER = LOGGER.getChild("record")
[docs]class NoSuchFeature(KeyError):
pass # pragma: no cov
[docs]class RecordPrediction(dict):
EXPORTED = ["value", "confidence"]
def __init__(self, *, confidence: float = 0.0, value: Any = None) -> None:
self["confidence"] = confidence
self["value"] = value
@property
def confidence(self):
return self["confidence"]
@property
def value(self):
return self["value"]
def dict(self):
if not self:
return []
return self
def __len__(self):
if self["confidence"] == 0.0 and self["value"] is None:
return 0
return 2
def __bool__(self):
return bool(len(self))
__nonzero__ = __bool__
class RecordData(object):
DATE_FORMAT = "%Y-%m-%dT%H:%M:%SZ"
EXPORTED = ["key", "features", "prediction"]
def __init__(
self,
*,
key: Optional[str] = None,
features: Optional[Dict[str, Any]] = None,
prediction: Optional[Dict[str, Any]] = None,
last_updated: Optional[datetime] = None,
) -> None:
# If the record is not evaluated or predicted then don't report out a new
# value for last_updated
self.last_updated_default = datetime.now()
if key is None:
key = ""
if features is None:
features = {}
if prediction is None:
prediction = {}
if last_updated is None:
last_updated = self.last_updated_default
if isinstance(last_updated, str):
last_updated = datetime.strptime(last_updated, self.DATE_FORMAT)
for _key, _val in prediction.items():
prediction[_key] = RecordPrediction(**_val)
self.key = key
self.features = features
self.prediction = prediction
self.last_updated = last_updated
def dict(self):
data = {
key: getattr(self, key, [])
for key in self.EXPORTED
if not isinstance(getattr(self, key, {}), dict)
or (
isinstance(getattr(self, key, {}), dict)
and getattr(self, key, {})
)
}
# Do not report if there has been no change since instantiation to
# a default time value
if self.last_updated != self.last_updated_default:
data["last_updated"] = self.last_updated.strftime(self.DATE_FORMAT)
return data
def __repr__(self):
return str(self.dict())
[docs]class Record(object):
"""
Manages feature independent information and actions for a record.
"""
RECORD_DATA = RecordData
def __init__(
self,
key: str,
*,
data: Optional[Dict[str, Any]] = None,
extra: Optional[Dict[str, Any]] = None,
) -> None:
if data is None:
data = {}
if extra is None:
extra = {}
data["key"] = str(key)
if "extra" in data:
# Prefer extra from init arguments to extra stored in data
data["extra"].update(extra)
extra = data["extra"]
del data["extra"]
self.data = self.RECORD_DATA(**data)
self.extra = extra
def dict(self):
# TODO(p2) Remove dict method in favor of export
return self.export()
def export(self):
data = self.data.dict()
data["extra"] = self.extra
return export(data)
def __repr__(self):
return str(self.dict())
def __str__(self):
try:
width = int(3 * os.get_terminal_size().columns / 4)
except OSError:
width = 70
header = "\n\tKey:\t" + self.key
divider = "+" + "-" * (width) + "+"
if len(self.extra.keys()):
header += "\n\t" + str(self.extra)
return "\n".join(
[header]
+ ["Record Features".center(width).rstrip()]
+ [divider]
+ [
create_row(feature, results, width)
for feature, results in map(
lambda k: (k[0], export(k[1])), self.features().items()
)
]
+ (
["\n" + "Prediction".center(width).rstrip()]
+ [divider]
+ [
create_row(pred, conf_val, width)
for pred, conf_val in self.data.prediction.items()
]
if self.data.prediction
else ["Prediction: Undetermined".rjust(width)]
)
).rstrip()
def merge(self, record: "Record"):
data = self.data.dict()
merge(data, record.data.dict())
self.data = self.RECORD_DATA(**data)
self.extra.update(record.extra) # type: ignore
@property
def key(self) -> str:
return str(self.data.key)
[docs] def evaluated(self, results: Dict[str, Any], overwrite=False):
"""
Updates features with the result dict
Parameters
----------
results : dict
The results that will be added to the features.
overwrite : boolean
If 'True', the function overwrites the current features with the results provided.
If 'Fasle', the function updates the current features with the results provided.
Examples
--------
>>> from dffml import *
>>>
>>> example = Record("example", data=dict(features=dict(dead="beef")))
>>> print(example.features())
{'dead': 'beef'}
>>> results = {"new": "feature"}
>>> example.evaluated({"feed": "face"})
>>> print(example.features())
{'dead': 'beef', 'feed': 'face'}
>>> example.evaluated(results, overwrite=True)
>>> print(example.features())
{'new': 'feature'}
"""
if overwrite:
self.data.features = results
else:
self.data.features.update(results)
self.data.last_updated = datetime.now()
LOGGER.info("Evaluated %s %r", self.data.key, self.data.features)
[docs] def features(self, subset: List[str] = []) -> Dict[str, Any]:
"""
Returns all features for the record or the subset specified.
Parameters
----------
subset : list[str]
The subset of features that will be returned.
Returns
-------
dict
features.
Examples
--------
>>> from dffml import *
>>>
>>> example = Record("example", data=dict(features=dict(dead="beef")))
>>>
>>> print(example.features(["dead"]))
{'dead': 'beef'}
"""
if not subset:
return self.data.features
for name in subset:
if (
not name in self.data.features
or self.data.features[name] is None
):
return {}
return {name: self.data.features[name] for name in subset}
[docs] def feature(self, name: str) -> Any:
"""
Returns a feature of the record.
Parameters
----------
name : str
The name of the feature that will be returned.
Returns
-------
any
feature.
Examples
--------
>>> from dffml import *
>>>
>>> example = Record("example", data=dict(features=dict(dead="beef")))
>>> print(example.feature("dead"))
beef
"""
if name not in self.data.features:
raise NoSuchFeature(name)
return self.data.features[name]
[docs] def predicted(self, target: str, value: Any, confidence: float):
"""
Set the prediction for this record.
Parameters
----------
target : str
The target you want to store the prediction at.
value : Any
The prediction.
Examples
--------
>>> from dffml import *
>>>
>>> example = Record("example", data=dict(features=dict(dead="beef")))
>>> example.predicted("target_name", "feed", 1.00)
>>> print(example.prediction("target_name"))
{'confidence': 1.0, 'value': 'feed'}
"""
self.data.prediction[target] = RecordPrediction(
value=value, confidence=float(confidence)
)
self.data.last_updated = datetime.now()
[docs] def prediction(self, target: str) -> RecordPrediction:
"""
Get the prediction for this record.
Parameters
----------
target : str
The name of the feature that will be returned.
Returns
-------
RecordPrediction
The prediction of the target specified.
Examples
--------
>>> from dffml import *
>>>
>>> example = Record("example", data=dict(features=dict(dead="beef")))
>>> example.predicted("target_name", "feed", 1.00)
>>> print(example.prediction("target_name"))
{'confidence': 1.0, 'value': 'feed'}
"""
return self.data.prediction[target]
[docs] def predictions(self, subset: List[str] = []) -> Dict[str, Any]:
"""
Get the predictions for the subset of record.
Parameters
----------
subset : list[str]
The list of subset of the record that predictions are returned for.
Returns
-------
dict
The prediction of the specified subset.
Examples
--------
>>> from dffml import *
>>>
>>> example = Record("example", data=dict(features=dict(dead="beef")))
>>> example.predicted("target_name1", "feed", 1.00)
>>> example.predicted("target_name2", "deed", 0.97)
>>> print(example.predictions(["target_name1", "target_name2"]))
{'target_name1': {'confidence': 1.0, 'value': 'feed'}, 'target_name2': {'confidence': 0.97, 'value': 'deed'}}
"""
if not subset:
return self.data.prediction
for name in subset:
if (
not name in self.data.prediction
or self.data.prediction[name] is None
):
return {}
return {name: self.data.prediction[name] for name in subset}