Noasync¶

dffml.noasync.load(*args, **kwargs)[source]¶

Yields records from a source.

Yields all the records from the source, if record keys are given then only those records are yielded.

Parameters:

source (BaseSource) – Data source to use. See Sources for sources and options.
*args (str) – Records to be returned. If empty, all the records in a source will be returned.

Returns:

Record object

Return type:

asynciterator

Examples

>>> from dffml import *
>>> from dffml.noasync import *
>>>
>>> source = CSVSource(filename="load.csv", allowempty=True, readwrite=True)
>>>
>>> def main():
...     save(
...         source,
...         Record("1", data={"features": {"A": 0, "B": 1}}),
...         Record("2", data={"features": {"A": 3, "B": 4}}),
...     )
...
...     # All records in source
...     for record in load(source):
...         print(record.export())
...
...     # For specific records in a source
...     for record in load(source, "1"):
...         print(record.export())
...
...     # Lightweight source syntax
...     for record in load("load.csv", "2"):
...         print(record.export())
>>>
>>> main()
{'key': '1', 'features': {'A': 0, 'B': 1}, 'extra': {}}
{'key': '2', 'features': {'A': 3, 'B': 4}, 'extra': {}}
{'key': '1', 'features': {'A': 0, 'B': 1}, 'extra': {}}
{'key': '2', 'features': {'A': 3, 'B': 4}, 'extra': {}}

dffml.noasync.predict(*args, **kwargs)[source]¶

Make a prediction using a machine learning model.

The model must be trained before using it to make a prediction.

Parameters:

model (Model) – Machine Learning model to use. See Models for models options.
*args (list) – Input data for prediction. Could be a dict, Record, filename, or one of the data Sources.
update (boolean, optional) – If True prediction data within records will be written back to all sources given. Defaults to False.
keep_record (boolean, optional) – If True the results will be kept as their Record objects instead of being converted to a (record.key, features, predictions) tuple. Defaults to False.

Returns:

Record objects or (record.key, features, predictions) tuple.

Return type:

iterator

Examples

>>> from dffml import *
>>> from dffml.noasync import *
>>>
>>> model = SLRModel(
...     features=Features(
...         Feature("Years", int, 1),
...     ),
...     predict=Feature("Salary", int, 1),
...     location="tempdir",
... )
>>>
>>> def main():
...     train(
...         model,
...         {"Years": 0, "Salary": 10},
...         {"Years": 1, "Salary": 20},
...         {"Years": 2, "Salary": 30},
...         {"Years": 3, "Salary": 40},
...     )
...     for i, features, prediction in predict(
...         model,
...         {"Years": 6},
...         {"Years": 7},
...     ):
...         features["Salary"] = round(prediction["Salary"]["value"])
...         print(features)
>>>
>>> main()
{'Years': 6, 'Salary': 70}
{'Years': 7, 'Salary': 80}

dffml.noasync.run(*args, **kwargs)[source]¶

>>> # Socket server derived from
>>> # https://docs.python.org/3/library/socketserver.html#asynchronous-mixins
>>> import socket
>>> import threading
>>> import socketserver
>>>
>>> from dffml.noasync import run
>>> from dffml import DataFlow, Input, op
>>>
>>> class ThreadedTCPRequestHandler(socketserver.BaseRequestHandler):
...     def handle(self):
...         data = str(self.request.recv(1024), "ascii")
...         response = bytes("{}".format(data), "ascii")
...         self.request.sendall(response)
>>>
>>> class ThreadedTCPServer(socketserver.ThreadingMixIn, socketserver.TCPServer):
...     pass
>>>
>>> @op
... def client(ip: str, port: int, message: str):
...     with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
...         sock.connect((ip, port))
...         sock.sendall(bytes(message, "ascii"))
...         response = str(sock.recv(1024), "ascii")
...         print("Received: {}".format(response))
>>>
>>> dataflow = DataFlow.auto(client)
>>>
>>> messages = [
...     Input(value="Hello World!", definition=client.op.inputs["message"])
...     for _ in range(0, 2)
... ]
>>>
>>> def main():
...     # Port 0 means to select an arbitrary unused port
...     HOST, PORT = "localhost", 0
...
...     server = ThreadedTCPServer((HOST, PORT), ThreadedTCPRequestHandler)
...     with server:
...         ip, port = server.server_address
...
...         # Start a thread with the server -- that thread will then start one
...         # more thread for each request
...         server_thread = threading.Thread(target=server.serve_forever)
...         # Exit the server thread when the main thread terminates
...         server_thread.daemon = True
...         server_thread.start()
...         print("Server loop running in a thread")
...
...         inputs = messages + [
...             Input(value=ip, definition=client.op.inputs["ip"]),
...             Input(value=port, definition=client.op.inputs["port"]),
...         ]
...
...         try:
...             for ctx, results in run(dataflow, inputs):
...                 pass
...         finally:
...             server.shutdown()
>>>
>>> main()
Server loop running in a thread
Received: Hello World!
Received: Hello World!

dffml.noasync.save(*args, **kwargs)[source]¶

Update a source’s knowledge about given records.

For each record given, call update on the source. Effectively saving all the records to the source.

Parameters:

source (BaseSource) – Data source to use. See Sources for sources and options.
*args (list) – Records to be saved.

Examples

>>> import pathlib
>>> from dffml import *
>>> from dffml.noasync import *
>>>
>>> source = CSVSource(filename="save.csv", allowempty=True, readwrite=True)
>>>
>>> def main():
...     save(
...         source,
...         Record(
...             "myrecord",
...             data={
...                 "features": {"Years": 0, "Expertise": 1, "Trust": 0.1},
...                 "prediction": {"Salary": {"value": 10, "confidence": 1.0}},
...             }
...         )
...     )
...     print(pathlib.Path("save.csv").read_text().strip())
>>>
>>> main()
key,tag,Expertise,Trust,Years,prediction_Salary,confidence_Salary
myrecord,untagged,1,0.1,0,10,1.0

dffml.noasync.score(*args, **kwargs)[source]¶

Assess the accuracy of a machine learning model.

Provide records to the model to assess the percent accuracy of its prediction abilities. The model should be already instantiated and trained.

Parameters:

model (Model) – Machine Learning model to use. See Models for models options.
*args (list) – Input data for training. Could be a dict, Record, filename, one of the data Sources, or a filename with the extension being one of the data sources.

Returns:

A decimal value representing the percent of the time the model made the correct prediction. For some models this has another meaning. Please see the documentation for the model your using for further details.

Return type:

float

Examples

>>> from dffml import *
>>> from dffml.noasync import *
>>>
>>> model = SLRModel(
...     features=Features(
...         Feature("Years", int, 1),
...     ),
...     predict=Feature("Salary", int, 1),
...     location="tempdir",
... )
>>>
>>> def main():
...     train(
...         model,
...         {"Years": 0, "Salary": 10},
...         {"Years": 1, "Salary": 20},
...         {"Years": 2, "Salary": 30},
...         {"Years": 3, "Salary": 40},
...     )
...     print(
...         "Accuracy:",
...         score(
...             model,
...             MeanSquaredErrorAccuracy(),
...             Feature("Salary", int, 1),
...             {"Years": 4, "Salary": 50},
...             {"Years": 5, "Salary": 60},
...         ),
...     )
>>>
>>> main()
Accuracy: 0.0

dffml.noasync.train(*args, **kwargs)[source]¶

Train a machine learning model.

Provide records to the model to train it. The model should be already instantiated.

Parameters:

model (Model) – Machine Learning model to use. See Models for models options.
*args (list) – Input data for training. Could be a dict, Record, filename, one of the data Sources, or a filename with the extension being one of the data sources.

Examples

>>> from dffml import *
>>> from dffml.noasync import train
>>>
>>> model = SLRModel(
...     features=Features(
...         Feature("Years", int, 1),
...     ),
...     predict=Feature("Salary", int, 1),
...     location="tempdir",
... )
>>>
>>> def main():
...     train(
...         model,
...         {"Years": 0, "Salary": 10},
...         {"Years": 1, "Salary": 20},
...         {"Years": 2, "Salary": 30},
...         {"Years": 3, "Salary": 40},
...     )
>>>
>>> main()