Generating Model Card with PyTorch
This notebook intends to provide an example of generating a model card for a PyTorch model using Intel Model Card Generator.
Data Collection and Prerpocessing from Adult Dataset
[ ]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.nn.functional import relu
import os
from sklearn.datasets import fetch_openml
from intel_ai_safety.model_card_gen.model_card_gen import ModelCardGen
from intel_ai_safety.model_card_gen.datasets import PytorchDataset
from torch.utils.data import Dataset
1. Data Collection and Preprocessing
[ ]:
CATEGORICAL_FEATURE_KEYS = [
'workclass',
'marital-status',
'occupation',
'relationship',
'race',
'sex',
'native-country',
]
NUMERIC_FEATURE_KEYS = [
'age',
'capital-gain',
'capital-loss',
'hours-per-week',
'education-num'
]
DROP_COLUMNS = ['fnlwgt', 'education']
LABEL_KEY = 'label'
Fetch Data from OpenML
[ ]:
data = fetch_openml(data_id=1590, as_frame=True)
raw_data = data.data
raw_data['label'] = data.target
adult_data = raw_data.copy()
Drop Unneeded Columns
[ ]:
adult_data = adult_data.drop(DROP_COLUMNS, axis=1)
adult_data = pd.get_dummies(adult_data, columns=CATEGORICAL_FEATURE_KEYS)
adult_data['label'] = adult_data['label'].map({'<=50K': 0, '>50K': 1})
Train Test Split
[ ]:
# Convert features and labels to numpy arrays.
labels = adult_data['label'].to_numpy()
adult_data = adult_data.drop(['label'], axis=1)
feature_names = list(adult_data.columns)
[ ]:
class AdultDataset(Dataset):
"""Face Landmarks dataset."""
def __init__(self, df, labels, transform=None):
self.data = self.make_input_tensor(df)
self.labels = self.make_label_tensor(labels)
self.transform = transform
def __len__(self):
return len(self.adult_df)
def make_input_tensor(self, df):
return torch.from_numpy(df.to_numpy()).type(torch.FloatTensor)
def make_label_tensor(self, label_array):
return torch.from_numpy(label_array)
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
sample = self.data[idx]
label = self.labels[idx]
if self.transform:
sample = self.transform(sample)
return sample, label
[ ]:
adult_dataset = AdultDataset(adult_data, labels)
2. Build Model
[ ]:
class AdultNN(nn.Module):
def __init__(self, num_features, num_classes):
super().__init__()
self.num_features = num_features
self.num_classes = num_classes
self.lin1 = torch.nn.Linear(self.num_features, 150)
self.lin2 = torch.nn.Linear(50, 50)
self.lin3 = torch.nn.Linear(50, 50)
self.lin4 = torch.nn.Linear(150, 150)
self.lin5 = torch.nn.Linear(50, 50)
self.lin6 = torch.nn.Linear(50, 50)
self.lin10 = torch.nn.Linear(150, self.num_classes)
self.prelu = nn.PReLU()
self.dropout = nn.Dropout(0.25)
def forward(self, xin):
x = relu(self.lin1(xin))
x = relu(self.lin4(x))
x = self.dropout(x)
x = relu(self.lin10(x))
return x
[ ]:
torch.manual_seed(1) # Set seed for reproducibility.
class AdultNN(nn.Module):
def __init__(self, feature_size, num_labels):
super().__init__()
self.linear1 = nn.Linear(feature_size, feature_size)
self.sigmoid1 = nn.Sigmoid()
self.linear2 = nn.Linear(feature_size, 8)
self.sigmoid2 = nn.Sigmoid()
self.linear3 = nn.Linear(8, 2)
self.softmax = nn.Softmax(dim=1)
def forward(self, x):
lin1_out = self.linear1(x)
sigmoid_out1 = self.sigmoid1(lin1_out)
sigmoid_out2 = self.sigmoid2(self.linear2(sigmoid_out1))
return self.softmax(self.linear3(sigmoid_out2))
3. Train Model
[ ]:
net = AdultNN(len(feature_names), 2)
criterion = nn.CrossEntropyLoss()
num_epochs = 500
optimizer = torch.optim.Adam(net.parameters(), lr=0.001)
input_tensor, label_tensor = adult_dataset[:]
for epoch in range(num_epochs):
output = net(input_tensor)
loss = criterion(output, label_tensor)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if epoch % 20 == 0:
print ('Epoch {}/{} => Loss: {:.2f}'.format(epoch+1, num_epochs, loss.item()))
4. Save Model
Save offline version of our module
[ ]:
torch.jit.save(torch.jit.script(net), 'adult_model.pt')
5. Generate Model Card
EvalConfig Input
[ ]:
_eval_config = 'eval_config.proto'
[ ]:
%%writefile {_eval_config}
model_specs {
label_key: 'label'
prediction_key: 'prediction'
}
metrics_specs {
metrics {class_name: "BinaryAccuracy"}
metrics {class_name: "AUC"}
metrics {class_name: "ConfusionMatrixPlot"}
# metrics {class_name: "ConfusionMatrixAtThresholds"}
metrics {
class_name: "FairnessIndicators"
# config: '{"thresholds": [0.25, 0.5, 0.75]}'
}
}
slicing_specs {}
slicing_specs {
feature_keys: 'sex_Female'
# feature_keys: 'sex_Male'
}
options {
include_default_metrics { value: false }
}
[ ]:
mc = {
"schema_version": "0.0.1",
"model_details": {
"name": "Adult Multilayer Neural Network",
"version": {
"name": "0.1",
"date": "2022-08-01"
},
"graphics": {},
"citations": [
{
"citation": 'Simoudis, Evangelos, Jiawei Han, and Usama Fayyad. Proceedings of the second international conference on knowledge discovery & data mining. No. CONF-960830-. AAAI Press, Menlo Park, CA (United States), 1996.'
},
{
"citation": 'Friedler, Sorelle A., et al. "A Comparative Study of Fairness-Enhancing Interventions in Machine Learning." Proceedings of the Conference on Fairness, Accountability, and Transparency, 2019, https://doi.org/10.1145/3287560.3287589.'
},
{
"citation": 'Lahoti, Preethi, et al. "Fairness without demographics through adversarially reweighted learning." Advances in neural information processing systems 33 (2020): 728-740.'
}
],
"overview": 'This example model card is for a multilayer network trained "Adult" dataset from the UCI repository with the learning task of predicting whether a person has a salary greater or less than $50,000.',
}
}
[ ]:
train_dataset = PytorchDataset(AdultDataset(adult_data, labels), feature_names=adult_data.columns)
[ ]:
mcg = ModelCardGen.generate(data_sets={'train': train_dataset},
model_path='adult_model.pt',
eval_config=_eval_config,
model_card=mc)
[ ]:
mcg.export_html('census_mc.html')