run_with_gpu = False

import numpy as np
import math
import os
import pyhelayers
import shutil
from sklearn import datasets
from sklearn.model_selection import train_test_split
from utils import get_used_ram, get_data_sets_dir
from xgboost import XGBClassifier
import pandas as pd

def preprocess(X, y):
    X['marital-status'] = X['marital-status'].str.strip()
    X['marital-status'] = X['marital-status'].replace(['Married-civ-spouse','Married-spouse-absent','Married-AF-spouse'], 'Married')
    X['marital-status'] = X['marital-status'].replace(['Never-married','Divorced','Separated','Widowed'], 'Single')
    X['marital-status'] = X['marital-status'].map({'Married':0, 'Single':1})
    X['marital-status'] = X['marital-status'].astype('int')
    X = X[['age', 'education-num', 'marital-status', 'hours-per-week', 'capital-loss', 'capital-gain']]
    y=y.str.strip().map({'<=50K': 0, '>50K': 1}).astype('int')
    return (X, y)
    
column_names = ['age', 'workclass', 'fnlwgt', 'education', 'education-num',
 'marital-status', 'occupation', 'relationship', 'race', 'sex',
  'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'label']
INPUT_DIR = os.path.join(get_data_sets_dir(), 'uci_adult')

train_data = pd.read_csv(os.path.join(INPUT_DIR, "adult.data"), names=column_names, header=None,
 index_col=False, engine='python')
X_train = train_data.iloc[:,:-1]
y_train = train_data.iloc[:,-1]
X_train, y_train = preprocess(X_train, y_train)

test_data = pd.read_csv(os.path.join(INPUT_DIR, "adult.test"), names=column_names, header=None,
 index_col=False, skiprows=1, sep="[,.]", engine='python')
X_test = test_data.iloc[:,:-1]
y_test = test_data.iloc[:,-1]
X_test, y_test = preprocess(X_test, y_test)

clf = XGBClassifier(eta=0.2, gamma=3.6, max_depth=3,
 min_child_weight=3, subsample=0.8, objective="binary:logistic",
 scale_pos_weight=14.978045943588253, eval_metric = "aucpr", n_estimators=10)
clf.fit(X_train, y_train)
model_dir = os.path.join('data', 'adult_xgboost')
os.makedirs(model_dir, exist_ok=True)
model_path = os.path.join(model_dir, 'xgb.json')
clf.save_model(model_path)
print('plain XGBoost model saved')

plain XGBoost model saved

from sklearn.metrics import f1_score
plain_xgb_preds = clf.predict(X_test)
f1_plain = f1_score(y_test, plain_xgb_preds)
print('plain XGBoost f1 score =', f1_plain)

plain XGBoost f1 score = 0.5488528915927582

def get_feature_range(col):
    return (col.min(), col.max())
    
feature_ranges = []
for col in X_train:
    feature_ranges.append(get_feature_range(X_train[col]))

hyper_params = pyhelayers.PlainModelHyperParams()
hyper_params.feature_ranges = feature_ranges
hyper_params.grep = 4
hyper_params.frep = 1
plain_xgb = pyhelayers.PlainModel.create(hyper_params, [model_path])

he_run_req = pyhelayers.HeRunRequirements()
he_run_req.set_he_context_options([pyhelayers.HeaanContext()])

profile = pyhelayers.HeModel.compile(plain_xgb, he_run_req)

he_context = pyhelayers.HeModel.create_context(profile)
if run_with_gpu:
    he_context.set_default_device(pyhelayers.DeviceType.DEVICE_GPU)
else:
    he_context.set_default_device(pyhelayers.DeviceType.DEVICE_CPU)

xgb = plain_xgb.get_empty_he_model(he_context)
storage_dir = os.path.join('outputs', 'xgb_storage')
os.makedirs(storage_dir, exist_ok=True)
fstorage=pyhelayers.FileStorage(storage_dir,create=True)
xgb.attach_output_storage(fstorage)

xgb.encode_encrypt(plain_xgb, profile)
print('FHE XGBoost model encrypted and initialized')

FHE XGBoost model encrypted and initialized

iop = xgb.create_io_processor()

X_test_enc = pyhelayers.EncryptedData(he_context)
iop.encode_encrypt_inputs_for_predict(X_test_enc, [X_test])
print('input data encrypted')

input data encrypted

xgb.flush_to_storage()

xgb_server = plain_xgb.get_empty_he_model(he_context)
xgb_server.attach_input_storage(fstorage)

res = pyhelayers.EncryptedData(he_context)
xgb_server.predict(res, X_test_enc)
print('prediction ready')

prediction ready

res_plain = iop.decrypt_decode_output(res)
res_plain = np.where(res_plain > 0, 1, 0)

f1_fhe = f1_score(y_test, res_plain)
print('FHE XGBoost f1 score =', f1_fhe)
assert(f1_fhe >= f1_plain - 0.1)

FHE XGBoost f1 score = 0.5494344057587777

print("RAM usage:", get_used_ram(), "MB")

RAM usage: 1730.9765625 MB

shutil.rmtree('outputs')

XGBoost Inference on the adult Dataset Using FHE¶

Introduction¶

Step 1. Training a plain XGBoost model¶

1.1 Decide whether this demo will be run on GPU¶

1.2. We start with some imports:¶

1.3. Load the adult dataset¶

1.4. Train the XGBoost model¶

1.5 Evaluate the XGBoost model¶

Step 2. FHE inference¶

2.1 Compute the feature ranges¶

2.2. Initialize a `PlainXGBoost` object¶

2.3 Define HE run requirements¶

2.4 Compile the plain model and HE run requirements into HE profile¶

2.5 Initialize the HE context¶

2.6. Initialize the XGBoost model and attach output storage¶

2.7 Encrypt the XGboost model¶

2.8 Get an `IoProcessor` from the HE XGBoost model.¶

2.9. Encrypt the test samples¶

2.10 Flush the XGBoost model to the output storage¶

2.11 Load the XGBoost model¶

2.12 Run prediction over the encrypted data¶

Step 3. Decrypt the prediction results¶

3.1. Evaluate the FHE prediction¶

3.2 Remove the output storage directory¶

Citations¶

XGBoost Inference on the adult Dataset Using FHE¶

Introduction¶

Step 1. Training a plain XGBoost model¶

1.1 Decide whether this demo will be run on GPU¶

1.2. We start with some imports:¶

1.3. Load the adult dataset¶

1.4. Train the XGBoost model¶

1.5 Evaluate the XGBoost model¶

Step 2. FHE inference¶

2.1 Compute the feature ranges¶

2.2. Initialize a PlainXGBoost object¶

2.3 Define HE run requirements¶

2.4 Compile the plain model and HE run requirements into HE profile¶

2.5 Initialize the HE context¶

2.6. Initialize the XGBoost model and attach output storage¶

2.7 Encrypt the XGboost model¶

2.8 Get an IoProcessor from the HE XGBoost model.¶

2.9. Encrypt the test samples¶

2.10 Flush the XGBoost model to the output storage¶

2.11 Load the XGBoost model¶

2.12 Run prediction over the encrypted data¶

Step 3. Decrypt the prediction results¶

3.1. Evaluate the FHE prediction¶

3.2 Remove the output storage directory¶

Citations¶

2.2. Initialize a `PlainXGBoost` object¶

2.8 Get an `IoProcessor` from the HE XGBoost model.¶