Source code for lib.model

try:
  from . import data_processing
except:
  import data_processing
import pickle
import os
import sys
import numpy as np
from sklearn.model_selection import KFold

import xgboost as xgb
import lightgbm as lgb
import pandas as pd

import joblib

root = os.path.abspath(os.path.join(__file__ ,"../../.."))

[docs]class BinaryClassifier(): """ Binary Classifier Model """ def __init__(self): # Load Best Parameters bi_config = os.path.join(root, 'models','binary_classfier_parameters.pkl') with open(bi_config,'rb') as file: model_paramters = pickle.load(file) # Classifier Paramters self.binary_clf_parameters = model_paramters['reg_paramters'] # Decision Threshold self.threshold = model_paramters['threshold'] # Save-Load Model Path self.model_path = os.path.join(root, 'models/binary_classifier.xgb')
[docs] def fit(self, X, y): """ fit binary classifier model :type X: pandas DataFrame :param X: Training features :type y: pandas Series :param y: Training labels """ X_train, _, y_train, _ = data_processing.train_test_sample(X, y, 0.0, upsample_type = 'SMOTE', over_sampling = 0.5, under_sampling = 0.8) print('Training Binary Classifier Model') self.clf = xgb.XGBClassifier( **self.binary_clf_parameters, objective="binary:logistic", random_state=42) self.clf.fit(X_train, y_train)
[docs] def dump(self, path = None): """ export model to file :type path: string :param path: path to dump model, defaults to None """ if path is None: path = self.model_path joblib.dump(self.clf, path) print(f'Binary Classifier saved at {path}')
[docs] def load(self, path = None): """ laod model from file :type path: string :param path: path to dump model, defaults to None """ if path is None: path = self.model_path self.clf = joblib.load(path) print(f'Model Loaded from {path}')
[docs] def predict(self, X): """ predict on data :type X: pandas DataFrame :param X: data to predict :return: predictions :rtype: pandas Series """ proba = self.clf.predict_proba(X) return (proba[:,1] > self.threshold).astype(int)
[docs] def k_fold_prediction(self, X, y, n_splits = 5): """ performs train-predict k fold :type X: pandas DataFrame :param X: Training Features :type y: pandas Series :param y: Training labels :type n_splits: int :param n_splits: number of k splits, defaults to 5 :return: k fold predictions :rtype: pandas Series """ cv_df = pd.concat([X, y], axis = 1) predictions = pd.Series(index = cv_df.index, dtype=np.float64) kf = KFold(n_splits=n_splits) kf.get_n_splits(cv_df) for train_index, test_index in kf.split(cv_df): X_train, X_test = X.iloc[train_index], X.iloc[test_index] y_train = y.iloc[train_index] X_train, y_train = data_processing.syntetic_sampling(X_train, y_train, over_sampling = 0.5, under_sampling = 0.8) # ML Model clf = xgb.XGBClassifier( **self.binary_clf_parameters, objective="binary:logistic", random_state=42) clf.fit(X_train, y_train, verbose=False) # Predict Probability proba = clf.predict_proba(X_test) predictions.iloc[test_index] = proba[:,1] return (predictions > self.threshold).astype(int)
[docs]class MultiClassifier(): """ Multiclass Classifier Model """ def __init__(self,): multi_config = os.path.join(root, 'models', 'multiclass_classfier_parameters.pkl') with open(multi_config,'rb') as file: self.multi_clf_parameters = pickle.load(file) self.model_path = os.path.join(root, 'models/multiclass_classifier.lgb')
[docs] def fit(self, X, y): """ fit multiclass classifier model :type X: pandas DataFrame :param X: Training features :type y: pandas Series :param y: Training labels """ X_train, _, y_train, _= data_processing.train_test_sample(X, y, 0) X_train_p = X_train[X_train['binary_predictions'] == 1]\ .drop(columns = ['binary_predictions']) y_train_p = y_train[X_train['binary_predictions'] == 1] w1 = self.multi_clf_parameters['w1'] w2 = self.multi_clf_parameters['w2'] w3 = self.multi_clf_parameters['w3'] print('Training Binary Classifier Model') self.mclf = lgb.LGBMClassifier(class_weight = {0:w1, 1:w2, 2:w3}, silent=True) self.mclf.fit(X_train_p, y_train_p)
[docs] def dump(self, path = None): """ :type path: string :param path: path to dump model, defaults to None """ if path is None: path = self.model_path joblib.dump(self.mclf, path) print(f'Multiclass Classifier saved at {path}')
[docs] def load(self, path = None): """ laod model from file :type path: string :param path: path to dump model, defaults to None """ if path is None: path = self.model_path self.mclf = joblib.load( path) print(f'Model Loaded from {path}')
[docs] def predict(self, X): """ predict on data :type X: pandas DataFrame :param X: data to predict :return: predictions :rtype: pandas Series """ y_predictions = pd.Series(index = X.index, dtype = np.float64) mask = X['binary_predictions'] == 1 X_p = X[mask].drop(columns = ['binary_predictions']) # Predict on positive Class y_predictions[ mask] = self.mclf.predict(X_p) # Negative class remains negative y_predictions[~mask] = 0 return y_predictions