Разработка модели обнаружения компрометации банковских транзакций
Разработка комплексных подходов обнаружения скомпрометированных банковских транзакций на основании ряда их признаков с помощью методов машинного обучения и других аналитических моделей. Анализ банковских операций, включая все их виды и особенности.
Рубрика | Программирование, компьютеры и кибернетика |
Вид | дипломная работа |
Язык | русский |
Дата добавления | 07.12.2019 |
Размер файла | 4,0 M |
Отправить свою хорошую работу в базу знаний просто. Используйте форму, расположенную ниже
Студенты, аспиранты, молодые ученые, использующие базу знаний в своей учебе и работе, будут вам очень благодарны.
for i, v in enumerate(pvs):
ax.text(i - 0.2, v + 0.01, str(round(v, 3)), color='black', fontsize=8)
plt.show()
return imp_cols
def get_train_test_dataset(df_array, test_percent, classname):
flen = len(df_array[0].loc[df_array[0][classname] == 1])
nflen = len(df_array[0].loc[df_array[0][classname] == 0])
nf_train_array = []
f_train_array = []
nf_test_array = []
f_test_array = []
random.seed()
r = random.sample(range(nflen), int(round(nflen * test_percent)))
for df in df_array:
no_fraud = df.loc[df[classname] == 0]
nf_train_array.append(no_fraud.iloc[r])
nf_test_array.append(no_fraud.drop(no_fraud.iloc[r].axes[0], axis=0))
random.seed()
r = random.sample(range(flen), int(round(flen * test_percent)))
for df in df_array:
fraud = df.loc[df[classname] == 1]
f_train_array.append(fraud.iloc[r])
f_test_array.append(fraud.drop(fraud.iloc[r].axes[0], axis=0))
results_train = []
results_test = []
for i in range(len(nf_train_array)):
results_train.append(pd.concat([nf_train_array[i], f_train_array[i]]))
results_test.append(pd.concat([nf_test_array[i], f_test_array[i]]))
return results_train, results_test
def get_smote_over_sampling(x_df, y_df, fraud_percent, classname):
rate = fraud_percent / (1 - fraud_percent)
sm = SMOTE(sampling_strategy=rate)
x_res, y_res = sm.fit_resample(x_df, y_df)
y_res_df = pd.DataFrame(data=y_res, columns=[classname])
x_res_df = pd.DataFrame(data=x_res, columns=x_df.columns.values)
return [x_res_df, y_res_df]
def multiple_result(df, model_str, print_results=False):
pr_mean = df['Precision'].mean()
rec_mean = df['Recall'].mean()
f1_mean = df['F1'].mean()
acc_mean = df['Accuracy'].mean()
tp_mean = df['TP'].mean()
tn_mean = df['TN'].mean()
fp_mean = df['FP'].mean()
fn_mean = df['FN'].mean()
hss_mean = df['HSS'].mean()
if print_results:
print('\n---' + model_str + '---\n')
plt.suptitle(model_str)
plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0.6, hspace=0.35)
plt.subplot(2, 4, 1)
plt.ylabel('Score')
plt.boxplot(df['Precision'].tolist())
plt.title('Precision')
plt.subplot(2, 4, 2)
plt.boxplot(df['Recall'].tolist())
plt.title('Recall')
plt.subplot(2, 4, 3)
plt.boxplot(df['F1'].tolist())
plt.title('F1')
plt.subplot(2, 4, 4)
plt.boxplot(df['HSS'].tolist())
plt.title('HSS')
plt.subplot(2, 4, 5)
plt.ylabel('Number of transactions')
plt.boxplot(df['TP'].tolist())
plt.title('True positive')
ax = plt.subplot(2, 4, 6)
ax.get_yaxis().get_major_formatter().set_useOffset(False)
plt.boxplot(df['TN'].tolist())
plt.title('True negative')
plt.subplot(2, 4, 7)
plt.boxplot(df['FP'].tolist())
plt.title('False positive')
plt.subplot(2, 4, 8)
plt.boxplot(df['FN'].tolist())
plt.title('False negative')
# plt.ticklabel_format(style='plain')
plt.show()
plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)
# plt.title('ROC Curve ' + model_str, fontsize=12)
# fpr = fp_mean / (tn_mean+fp_mean)
# tpr = tp_mean / (tp_mean+fn_mean)
#
# plt.plot([0, fpr, 1],
# [0, tpr, 1], linewidth=2)
# plt.plot([0, 1], [0, 1], 'r--')
# plt.xlabel('False Positive Rate', fontsize=12)
# plt.ylabel('True Positive Rate', fontsize=12)
# plt.axis([-0.01, 1, 0, 1])
# plt.show()
print('Precision mean: ' + str(round(pr_mean, 4)))
print('Recall mean: ' + str(round(rec_mean, 4)))
print('F1 mean: ' + str(round(f1_mean, 4)))
print('HSS mean: ' + str(round(hss_mean, 4)))
print('True positive mean: ' + str(round(tp_mean, 4)) + ' | ' + str(round(tp_mean / (tp_mean + fn_mean), 2)))
print('True negative mean: ' + str(round(tn_mean, 4)))
print('False positive mean: ' + str(round(fp_mean, 4)))
print('False negative mean: ' + str(round(fn_mean, 4)) + ' | ' + str(round(fn_mean / (tp_mean + fn_mean), 2)))
return pr_mean, rec_mean, f1_mean, acc_mean, hss_mean, tp_mean, tn_mean, fp_mean, fn_mean
def test_model(model, model_name, x_train, y_train, x_test, y_test, show_reports, show_roc=False):
start_time = time.time()
y_train = y_train.values.ravel()
y_test = y_test.values.ravel()
model.fit(x_train, y_train)
duration_fit = time.time() - start_time
start_time = time.time()
y_res = model.predict(x_test)
y_pred_proba = model.predict_proba(x_test)[::, 1]
fpr, tpr, threshold = roc_curve(y_test, y_pred_proba)
auc = skl.metrics.roc_auc_score(y_test, y_pred_proba)
if show_reports:
plt.plot(fpr, tpr, label="auc=" + str(auc))
plt.title('ROC Curve ' + model_name, fontsize=12)
plt.legend(loc=4)
plt.xlabel('False Positive Rate', fontsize=12)
plt.ylabel('True Positive Rate', fontsize=12)
plt.show()
duration_predict = time.time() - start_time
precision_sc, recall_sc, f1, accuracy_sc = \
get_model_scores(y_res, y_test, model_name,
show_report=show_reports)
tn_sc, tp_sc, fp_sc, fn_sc, hss = get_conf_matrix_data(y_res, y_test, print_data=show_reports)
if show_reports:
print('AUC: ' + str(round(auc, 4)))
if show_reports:
print('HSS: ', hss)
if 'Decision tree' in model_name:
skl.tree.export_graphviz(model, out_file=save_to_path + model_name + '.dot', class_names=['No fraud', 'Fraud'],
filled=True)
return precision_sc, recall_sc, f1, accuracy_sc, tn_sc, tp_sc, fp_sc, fn_sc, hss, duration_fit,\
duration_predict, model
def get_model_scores(y_pred, y_act, model_str, show_roc=False, show_report=False):
precision_sc = precision_score(y_act, y_pred)
recall_sc = recall_score(y_act, y_pred)
accuracy_sc = accuracy_score(y_act, y_pred)
f1_sc = f1_score(y_act, y_pred)
if show_report:
print('\n' + model_str + '\n')
print(classification_report(y_act, y_pred))
print('Precision: ' + str(round(precision_sc, 4)))
print('Recall: ' + str(round(recall_sc, 4)))
print('F1: ' + str(round(f1_sc, 4)))
print('Accuracy: ' + str(round(accuracy_sc, 4)))
get_conf_matrix_data(y_pred, y_act, False)
if show_roc:
fpr, tpr, threshold = roc_curve(y_act, y_pred)
plt.title('ROC Curve ' + model_str, fontsize=12)
plt.plot(fpr, tpr, linewidth=2)
plt.plot([0, 1], [0, 1], 'r--')
plt.xlabel('False Positive Rate', fontsize=12)
plt.ylabel('True Positive Rate', fontsize=12)
plt.axis([-0.01, 1, 0, 1])
plt.show()
return precision_sc, recall_sc, f1_sc, accuracy_sc
def get_conf_matrix_data(y_pred, y_act, print_data=False):
cm = confusion_matrix(y_act, y_pred)
tn_sc, tp_sc, fp_sc, fn_sc = cm[0][0], cm[1][1], cm[0][1], cm[1][0]
# Heidke's Skill Score
hss = 2 * (tp_sc * tn_sc - fp_sc * fn_sc) / ((tp_sc + fn_sc) * (fn_sc + tn_sc) + (tp_sc + fp_sc) * (fp_sc + tn_sc))
if print_data:
print('True Negative: ', str(tn_sc))
print('True Positive: ', str(tp_sc))
print('False Positive: ', str(fp_sc))
print('False Negative: ', str(fn_sc))
return tn_sc, tp_sc, fp_sc, fn_sc, hss
def split_variable_by_percentile(array, class_number, var_name):
percents = []
for i in range(class_number):
percents.append(round(100 / class_number * (i + 1), 1))
percentile_ranges = np.percentile(a=array, q=percents)
class_var = []
for val in array:
not_empty = False
for i in range(len(percentile_ranges)):
if i == 0:
if val <= percentile_ranges[i]:
class_var.append(i)
not_empty = True
else:
if percentile_ranges[i-1] < val <= percentile_ranges[i]:
class_var.append(i)
not_empty = True
if not not_empty:
print('SPLITTING ERROR')
return pd.DataFrame(columns=[var_name], data=class_var)
def create_grouped_dataset(df, class_number):
if os.path.isfile(save_to_path + 'Classed_' + str(class_number) + '.csv'):
return pd.read_csv(save_to_path + 'Classed_' + str(class_number) + '.csv', sep=';', index_col=0)
else:
print('Splitting started')
class_column = df['Class']
df = df.drop('Class', axis=1)
cols = df.columns.values
var_columns = []
for c in cols:
var_columns.append(
split_variable_by_percentile(array=df[c], class_number=class_number, var_name=c + '_Class'))
var_columns.append(class_column)
print('Splitting finished')
result_ds = pd.concat(var_columns, axis=1, sort=False)
result_ds.to_csv(save_to_path + 'Classed_' + str(class_number) + '.csv', sep=';')
return result_ds
def major_voting(x_test, rate):
results = []
for i, e in enumerate(x_test):
votes_number = np.sum(e)
if votes_number >= rate:
results.append(1)
else:
results.append(0)
return results
# ---Downloading data------------------
data = pd.read_csv(data_path)
# ---Iterative process------------------
lines_num = iterations_number * len(models_list)
model_score = pd.DataFrame(columns=['Model', 'Precision', 'Recall', 'F1', 'Accuracy', 'TN', 'TP', 'FP', 'FN',
'HSS'],
index=range(lines_num))
ln = 0
classed_data_20 = create_grouped_dataset(df=data, class_number=20)
classed_data_30 = create_grouped_dataset(df=data, class_number=30)
imp = sample_t_tests(data, class_name, to_print=False)
imp_classed_20 = sample_t_tests(classed_data_20, class_name, to_print=False)
imp_classed_30 = sample_t_tests(classed_data_30, class_name, to_print=False)
print("Columns to use:")
print(imp)
print(imp_classed_20)
print(imp_classed_30)
for i in range(iterations_number):
print('\nIteration ' + str(i) + ' started\n')
start_time = time.time()
[train_dt, train_20_dt, train_30_dt], [test_dt, test_20_dt, test_30_dt] =\
get_train_test_dataset([data, classed_data_20, classed_data_30], train_rate, class_name)
x_train_dt = train_dt[imp]
x_train_20_dt = train_20_dt[imp_classed_20]
x_train_30_dt = train_30_dt[imp_classed_30]
y_train_dt = train_dt[class_name]
if train_rate == 1 or test_at_all:
x_test = data[imp]
x_test_20 = classed_data_20[imp_classed_20]
x_test_30 = classed_data_30[imp_classed_30]
y_test = data[class_name]
else:
x_test = test_dt[imp]
x_test_20 = test_20_dt[imp_classed_20]
x_test_30 = test_30_dt[imp_classed_30]
y_test = test_dt[class_name]
# ---Data analysis---
x_train_array = ()
x_test_array = ()
x_train_WOKNN_array = ()
x_test_WOKNN_array = ()
# Logistic regression
if logistic_regression_model:
# SMOTE oversampling
mod_start_time = time.time()
print('Logistic regression start')
x_train_smote, y_train_smote = get_smote_over_sampling(x_train_30_dt, y_train_dt, 0.01, class_name)
log_reg = skl.linear_model.LogisticRegression(solver='liblinear', tol=0.001)
log_reg = test_model(model=log_reg, model_name='Logistic regression',
x_train=x_train_smote, y_train=y_train_smote, x_test=x_test_30, y_test=y_test,
show_reports=False)[11]
x_train_log_reg = log_reg.predict(x_train_30_dt)
x_test_log_reg = log_reg.predict(x_test_30)
x_train_array = x_train_array + (x_train_log_reg,)
x_test_array = x_test_array + (x_test_log_reg,)
x_train_WOKNN_array = x_train_WOKNN_array + (x_train_log_reg,)
x_test_WOKNN_array = x_test_WOKNN_array + (x_test_log_reg,)
print('Finished - ' + str(time.time() - mod_start_time) + ' sec')
# K nearest neighbours
if k_nearest_model:
# SMOTE oversampling
print('KNN start')
mod_start_time = time.time()
x_train_smote, y_train_smote = get_smote_over_sampling(x_train_30_dt, y_train_dt, 0.01, class_name)
knn = skl.neighbors.KNeighborsClassifier()
knn = test_model(model=knn, model_name='KNN',
x_train=x_train_smote, y_train=y_train_smote,
x_test=x_test_30, y_test=y_test, show_reports=False)[11]
x_train_knn = knn.predict(x_train_30_dt)
x_test_knn = knn.predict(x_test_30)
x_train_array = x_train_array + (x_train_knn,)
x_test_array = x_test_array + (x_test_knn,)
print('Finished - ' + str(time.time() - mod_start_time) + ' sec')
# Naive Bayes
if naive_bayes_model:
print('NB start')
mod_start_time = time.time()
gnb = GaussianNB()
gnb = test_model(model=gnb, model_name='Gaussian NB',
x_train=x_train_20_dt, y_train=y_train_dt, x_test=x_test_20, y_test=y_test,
show_reports=False)[11]
x_train_nb = gnb.predict(x_train_20_dt)
x_test_nb = gnb.predict(x_test_20)
x_train_array = x_train_array + (x_train_nb,)
x_test_array = x_test_array + (x_test_nb,)
x_train_WOKNN_array = x_train_WOKNN_array + (x_train_nb,)
x_test_WOKNN_array = x_test_WOKNN_array + (x_test_nb,)
print('Finished - ' + str(time.time() - mod_start_time) + ' sec')
# Support Vector Machine
if svm_model:
print('SVM start')
mod_start_time = time.time()
svm = CalibratedClassifierCV(base_estimator=LinearSVC(dual=False))
x_train_smote, y_train_smote = get_smote_over_sampling(x_train_dt, y_train_dt, 0.005, class_name)
svm = test_model(model=svm, model_name='SVM (Clear)',
x_train=x_train_smote, y_train=y_train_smote, x_test=x_test, y_test=y_test,
show_reports=False)[11]
x_train_svm = svm.predict(x_train_dt)
x_test_svm = svm.predict(x_test)
x_train_array = x_train_array + (x_train_svm,)
x_test_array = x_test_array + (x_test_svm,)
x_train_WOKNN_array = x_train_WOKNN_array + (x_train_svm,)
x_test_WOKNN_array = x_test_WOKNN_array + (x_test_svm,)
print('Finished - ' + str(time.time() - mod_start_time) + ' sec')
# Decision tree
if decision_tree_model:
print('Decision tree start')
mod_start_time = time.time()
dtc = skl.tree.DecisionTreeClassifier(max_depth=10, min_samples_split=6)
dtc = test_model(model=dtc, model_name='Decision tree (Clear)',
x_train=x_train_dt, y_train=y_train_dt, x_test=x_test, y_test=y_test,
show_reports=False)[11]
x_train_dtc = dtc.predict(x_train_dt)
x_test_dtc = dtc.predict(x_test)
x_train_array = x_train_array + (x_train_dtc,)
x_test_array = x_test_array + (x_test_dtc,)
x_train_WOKNN_array = x_train_WOKNN_array + (x_train_dtc,)
x_test_WOKNN_array = x_test_WOKNN_array + (x_test_dtc,)
print('Finished - ' + str(time.time() - mod_start_time) + ' sec')
# AdaBoost
if ada_boost_model:
print('AdaBoost start')
mod_start_time = time.time()
dtc = skl.tree.DecisionTreeClassifier(max_depth=8, min_samples_split=2)
adb = skl.ensemble.AdaBoostClassifier(n_estimators=80, base_estimator=dtc)
adb = test_model(model=adb, model_name='AdaBoost',
x_train=x_train_dt, y_train=y_train_dt, x_test=x_test, y_test=y_test,
show_reports=False)[11]
x_train_adb = adb.predict(x_train_dt)
x_test_adb = adb.predict(x_test)
x_train_array = x_train_array + (x_train_adb,)
x_test_array = x_test_array + (x_test_adb,)
x_train_WOKNN_array = x_train_WOKNN_array + (x_train_adb,)
x_test_WOKNN_array = x_test_WOKNN_array + (x_test_adb,)
print('Finished - ' + str(time.time() - mod_start_time) + ' sec')
# Random forest
if random_forest_model:
print('RF start')
mod_start_time = time.time()
rfm = skl.ensemble.RandomForestClassifier(n_estimators=80, max_depth=15, min_samples_split=4)
rfm = test_model(model=rfm, model_name='Random forest (Clear)',
x_train=x_train_dt, y_train=y_train_dt, x_test=x_test, y_test=y_test,
show_reports=False)[11]
x_train_rfm = rfm.predict(x_train_dt)
x_test_rfm = rfm.predict(x_test)
x_train_array = x_train_array + (x_train_rfm,)
x_test_array = x_test_array + (x_test_rfm,)
x_train_WOKNN_array = x_train_WOKNN_array + (x_train_rfm,)
x_test_WOKNN_array = x_test_WOKNN_array + (x_test_rfm,)
print('Finished - ' + str(time.time() - mod_start_time) + ' sec')
x_train_final = np.array(x_train_array).T
x_test_final = np.array(x_test_array).T
x_train_WOKNN_final = np.array(x_train_WOKNN_array).T
x_test_WOKNN_final = np.array(x_test_WOKNN_array).T
np.savetxt(fname=save_to_path + 'Test_WOKNN.csv', X=x_test_WOKNN_final, delimiter=';')
np.savetxt(fname=save_to_path + 'Test_WKNN.csv', X=x_test_final, delimiter=';')
# final_ds = np.array(
# (x_test_log_reg, x_test_knn, x_test_nb, x_test_svm, x_test_dtc, x_test_adb, x_test_rfm, y_test.values)).T
# np.savetxt(fname=save_to_path + 'FinalData.csv', X=final_ds, delimiter=';')
# VOTING CLASSIFIER
y_predicted_final = major_voting(x_test_final, 4)
final_scores = get_model_scores(y_pred=y_predicted_final, y_act=y_test, model_str='FINAL Voting',
show_roc=False, show_report=False)
conf_matrix = get_conf_matrix_data(y_predicted_final, y_test, print_data=False)
mv_results = final_scores + conf_matrix
# Without KNN
y_predicted_WOKNN_final = major_voting(x_test_WOKNN_final, 4)
final_WOKNN_scores = get_model_scores(y_pred=y_predicted_WOKNN_final, y_act=y_test, model_str='FINAL Voting',
show_roc=False, show_report=False)
conf_matrix_WOKNN = get_conf_matrix_data(y_predicted_WOKNN_final, y_test, print_data=False)
mv_results_WOKNN = final_WOKNN_scores + conf_matrix_WOKNN
# NAIVE BAYES CLASSIFIER
nb_classifier = GaussianNB()
final_nb_results = test_model(model=nb_classifier, model_name='Final model NB',
x_train=x_train_final, y_train=y_train_dt, x_test=x_test_final, y_test=y_test,
show_reports=False, show_roc=False)
final_nb_WOKNN_results = test_model(model=nb_classifier, model_name='Final model NB',
x_train=x_train_WOKNN_final, y_train=y_train_dt, x_test=x_test_WOKNN_final, y_test=y_test,
show_reports=False, show_roc=False)
# LOGISTIC REGRESSION CLASSIFIER
final_lr = skl.linear_model.LogisticRegression()
final_lr_results = test_model(model=final_lr, model_name='Final model logistic regression',
x_train=x_train_final, y_train=y_train_dt, x_test=x_test_final, y_test=y_test,
show_reports=False, show_roc=False)
final_lr_WOKNN_results = test_model(model=final_lr, model_name='Final model logistic regression',
x_train=x_train_WOKNN_final, y_train=y_train_dt, x_test=x_test_WOKNN_final, y_test=y_test,
show_reports=False, show_roc=False)
# DECISION TREE
final_dtc = skl.tree.DecisionTreeClassifier(max_depth=8, min_samples_leaf=2)
final_dtc_results = test_model(model=final_dtc, model_name='Final model Decision tree',
x_train=x_train_final, y_train=y_train_dt, x_test=x_test_final, y_test=y_test,
show_reports=False, show_roc=False)
final_dtc_WOKNN_results = test_model(model=final_dtc, model_name='Final model Decision tree',
x_train=x_train_WOKNN_final, y_train=y_train_dt, x_test=x_test_WOKNN_final, y_test=y_test,
show_reports=False, show_roc=False)
model_score.loc[ln] = ('Major voting',) + mv_results[:9]
ln += 1
model_score.loc[ln] = ('Naive Bayes',) + final_nb_results[:9]
ln += 1
model_score.loc[ln] = ('Logistic regression',) + final_lr_results[:9]
ln += 1
model_score.loc[ln] = ('Decision tree',) + final_dtc_results[:9]
ln += 1
model_score.loc[ln] = ('Major voting W/O KNN',) + mv_results_WOKNN[:9]
ln += 1
model_score.loc[ln] = ('Naive Bayes W/O KNN',) + final_nb_WOKNN_results[:9]
ln += 1
model_score.loc[ln] = ('Logistic regression W/O KNN',) + final_lr_WOKNN_results[:9]
ln += 1
model_score.loc[ln] = ('Decision tree W/O KNN',) + final_dtc_WOKNN_results[:9]
ln += 1
print('Iteration finished\nDuration: ' + str(time.time() - start_time) + '\n')
general_results = []
general_results_WOKNN = []
# print(model_score)
for mdl in models_list:
if 'W/O' not in mdl:
score_df = model_score.loc[(model_score['Model'] == mdl)]
if len(score_df.index) != 0:
# multiple_result(df=score_df, model_str=(mdl), print_results=True)
general_results.append(
(mdl,) + multiple_result(df=score_df, model_str=mdl, print_results=True))
for mdl in models_list:
if 'W/O' in mdl:
score_df = model_score.loc[(model_score['Model'] == mdl)]
if len(score_df.index) != 0:
# multiple_result(df=score_df, model_str=(mdl), print_results=True)
general_results_WOKNN.append(
(mdl,) + multiple_result(df=score_df, model_str=mdl, print_results=True))
fig = plt.figure(1)
bar_names = [k[0] for i, k in enumerate(general_results)]
index = np.arange(len(general_results))
bar_width = 0.2
opacity = 0.6
ax = fig.add_subplot(1, 2, 1)
f1 = [k[3] for i, k in enumerate(general_results)]
rec = [k[2] for i, k in enumerate(general_results)]
pr = [k[1] for i, k in enumerate(general_results)]
hss_vals = [k[5] for i, k in enumerate(general_results)]
rects1 = ax.bar(index, f1, bar_width, color='b', alpha=opacity,
label='F1')
rects2 = ax.bar(index + 2 * bar_width, rec, bar_width, color='r', alpha=opacity,
label='Recall')
rects3 = ax.bar(index + 3 * bar_width, pr, bar_width, color='g', alpha=opacity,
label='Precision')
rects4 = ax.bar(index + 1 * bar_width, hss_vals, bar_width, color='m', alpha=opacity,
label='HSS')
ax.set_xlabel('Model')
ax.set_ylabel('Scores')
ax.legend()
ax.set_xticks(index + bar_width)
ax.set_xticklabels(bar_names, rotation=30)
ax2 = fig.add_subplot(1, 2, 2)
tp = [k[6] for i, k in enumerate(general_results)]
fp = [k[8] for i, k in enumerate(general_results)]
fn = [k[9] for i, k in enumerate(general_results)]
rects11 = ax2.bar(index, tp, bar_width, color='g', alpha=opacity,
label='TP')
rects31 = ax2.bar(index + bar_width, fp, bar_width, color='b', alpha=opacity,
label='FP')
rects41 = ax2.bar(index + 2 * bar_width, fn, bar_width, color='r', alpha=opacity,
label='FN')
ax2.set_xlabel('Model')
ax2.set_ylabel('Scores')
ax2.legend()
ax2.set_xticks(index + bar_width)
ax2.set_xticklabels(bar_names, rotation=30)
# WITHOUT KNN
fig_woknn= plt.figure(2)
bar_names_woknn = [k[0] for i, k in enumerate(general_results_WOKNN)]
index_woknn = np.arange(len(general_results_WOKNN))
bar_width = 0.2
opacity = 0.6
ax_woknn = fig_woknn.add_subplot(1, 2, 1)
f1_woknn = [k[3] for i, k in enumerate(general_results_WOKNN)]
rec_woknn = [k[2] for i, k in enumerate(general_results_WOKNN)]
pr_woknn = [k[1] for i, k in enumerate(general_results_WOKNN)]
hss_vals_woknn = [k[5] for i, k in enumerate(general_results_WOKNN)]
rects1_woknn = ax_woknn.bar(index_woknn, f1_woknn, bar_width, color='b', alpha=opacity,
label='F1')
rects2_woknn = ax_woknn.bar(index_woknn + 2 * bar_width, rec_woknn, bar_width, color='r', alpha=opacity,
label='Recall')
rects3_woknn = ax_woknn.bar(index_woknn + 3 * bar_width, pr_woknn, bar_width, color='g', alpha=opacity,
label='Precision')
rects4_woknn = ax_woknn.bar(index_woknn + 1 * bar_width, hss_vals_woknn, bar_width, color='m', alpha=opacity,
label='HSS')
ax_woknn.set_xlabel('Model')
ax_woknn.set_ylabel('Scores')
ax_woknn.legend()
ax_woknn.set_xticks(index_woknn + bar_width)
ax_woknn.set_xticklabels(bar_names_woknn, rotation=30)
ax2_woknn = fig_woknn.add_subplot(1, 2, 2)
tp_woknn = [k[6] for i, k in enumerate(general_results_WOKNN)]
fp_woknn = [k[8] for i, k in enumerate(general_results_WOKNN)]
fn_woknn = [k[9] for i, k in enumerate(general_results_WOKNN)]
rects11_woknn = ax2_woknn.bar(index_woknn, tp_woknn, bar_width, color='g', alpha=opacity,
label='TP')
rects31_woknn = ax2_woknn.bar(index_woknn + bar_width, fp_woknn, bar_width, color='b', alpha=opacity,
label='FP')
rects41_woknn = ax2_woknn.bar(index_woknn + 2 * bar_width, fn_woknn, bar_width, color='r', alpha=opacity,
label='FN')
ax2_woknn.set_xlabel('Model')
ax2_woknn.set_ylabel('Scores')
ax2_woknn.legend()
ax2_woknn.set_xticks(index_woknn + bar_width)
ax2_woknn.set_xticklabels(bar_names_woknn, rotation=30)
fig_woknn.tight_layout()
plt.show()
Размещено на Allbest.ru
Подобные документы
Общее понятие, поддержка и основные свойства транзакций. Модели плоских транзакций и их хроники. Модель вложенных транзакций: сущность и примеры. Модель многоуровневых транзакций и рабочих потоков. Классификация различных систем обработки транзакций.
курсовая работа [1,2 M], добавлен 08.02.2011Разработка учебного электронного пособия, в котором по средствам интерактивного участия пользователь освоит механизмы осуществления удаленных банковских транзакциях и методы их защиты. Разработка в среде Macromedia Flash MX, язык - ActionScript 2.0.
дипломная работа [3,3 M], добавлен 30.08.2010Изучение методики учета банковских и финансовых операций в бюджетной организации. Описание программных средств и разработка автоматизированной информационной системы по учету банковских операций. Характеристика алгоритма и блок-схемы АИС организации.
дипломная работа [1,6 M], добавлен 24.09.2012Исследование рынка банковских программ. Анализ эффективности различных рекомендательных алгоритмов. Обзор имеющихся подходов выработки рекомендаций. Архитектура разрабатываемой системы. Методы коллаборативной фильтрации. Использование контентных методов.
курсовая работа [678,2 K], добавлен 31.08.2016Целостность БД как правильность и непротиворечивость ее содержимого на уровне отдельных объектов и операций и базы данных в целом. Понятие и содержание, выполнение и откат транзакции. Сервисные программные средства. Характерные свойства и черты ACID.
презентация [49,8 K], добавлен 19.08.2013Анализ инцидентов информационной безопасности. Структура и классификация систем обнаружения вторжений. Разработка и описание сетей Петри, моделирующих СОВ. Расчет времени реакции на атакующее воздействие. Верификация динамической модели обнаружения атак.
дипломная работа [885,3 K], добавлен 17.07.2016Формирование и предоставление бухгалтерской информации в электронном виде внешним контролирующим органам. Сущность комплекса задач по автоматизации учета банковских операций, ее реализация программе "1С: Бухгалтерия". Особенности технология автоматизации.
курсовая работа [1,2 M], добавлен 23.09.2016Проектирование автоматизированной информационной системы для учета банковских операций в бюджетной организации на платформе 1C: Предприятие. Блок-схема алгоритма решения задачи. Инструкция пользователю, установка программы и контрольный пример ее работы.
дипломная работа [3,2 M], добавлен 23.09.2012Обобщенная модель процесса обнаружения атак. Обоснование и выбор контролируемых параметров и программного обеспечения для разработки системы обнаружения атак. Основные угрозы и уязвимые места. Использование системы обнаружения атак в коммутируемых сетях.
дипломная работа [7,7 M], добавлен 21.06.2011Описание алгоритмического языка для программирования прикладных систем обработки данных. Проектирование базы данных для ведения банковских счетов юридических лиц. Разработка комплекса программ и средств взаимодействия с ЭВМ (меню). Листинг программы.
курсовая работа [76,1 K], добавлен 07.12.2011