Разработка рекомендательной системы: реализация микросервисов для автоматической обработки и интеллектуального анализа данных

Разработка универсальной рекомендательной системы. Реализация микросервисов автоматической обработки и интеллектуального анализа данных. Основные требования к модулю анализа для универсальной рекомендательной системы. Алгоритмы и методы решения задач.

Рубрика Программирование, компьютеры и кибернетика
Вид дипломная работа
Язык русский
Дата добавления 01.08.2017
Размер файла 2,9 M

Отправить свою хорошую работу в базу знаний просто. Используйте форму, расположенную ниже

Студенты, аспиранты, молодые ученые, использующие базу знаний в своей учебе и работе, будут вам очень благодарны.

prediction2 = 0.2 distance_matrix. set_value (pr ["_id"], r ["_id"], 1-prediction2) distance_matrix. set_value (r ["_id"], pr ["_id"], 1 - prediction2) if ObjectId (i)! =r2 ["_id"]:

pr2 = col. find_one ({"_id": ObjectId (i) }) prediction2 = dp. predict (self. select_features (fields, pr2), self. select_features (fields, r2), fields) if prediction2 > bound:

prediction2 = 0.9 else:

prediction2 = 0.2 distance_matrix. set_value (pr2 ["_id"], r2 ["_id"], 1-prediction2) distance_matrix. set_value (r2 ["_id"], pr2 ["_id"], 1 - prediction2) if prediction > bound:

prediction = 0.9 else:

prediction = 0.2 distance_matrix. set_value (r ["_id"], r ["_id"], 0) distance_matrix. set_value (r2 ["_id"], r2 ["_id"], 0) distance_matrix. set_value (r ["_id"],r2 ["_id"],1-prediction) distance_matrix. set_value (r2 ["_id"], r ["_id"], 1-prediction) distance_matrix = distance_matrix. fillna (0) graphs_matrices [c] = distance_matrix pickle. dump (graphs_matrices, open (self. process+"_matrix_"+file_name, "wb")) return graphs_matrices else:

raise ValueError ("matrix with such name already exists") def new_distance_matrix_c (self,file_name, bound=0.6, c=0):

col = self. mongo [self. process] number_of_clusters = self. mongo ["params"]. find_one ({"process": self. process}, no_cursor_timeout=True) [ "clusters_count"] fields = self. mongo ["params"]. find_one ({"process": self. process}) ["fields"] dp = DuplicatesPredictor (self. model,self. redis,self. process) distance_matrix = pd. DataFrame () for r in col. find ({"cluster": c}, no_cursor_timeout=True):

for r2 in col. find ({"cluster": c}, no_cursor_timeout=True):

if r ["_id"]! =r2 ["_id"]:

prediction = dp. predict (self. select_features (fields,r),self. select_features (fields,r2),fields) print (prediction) columns = distance_matrix. columns. values if prediction>bound:

print (r ["title"], r2 ["title"]) print ("DUPLICATE") print (prediction) for i in columns:

if ObjectId (i)! =r ["_id"]:

pr = col. find_one ({"_id": ObjectId (i) }) prediction2 = dp. predict (self. select_features (fields, pr), self. select_features (fields, r), fields) if prediction2 > bound:

prediction2 = 0.9 else:

prediction2 = 0.2 distance_matrix. set_value (pr ["_id"], r ["_id"], 1-prediction2) distance_matrix. set_value (r ["_id"], pr ["_id"], 1 - prediction2) if ObjectId (i)! =r2 ["_id"]:

pr2 = col. find_one ({"_id": ObjectId (i) }) prediction2 = dp. predict (self. select_features (fields, pr2), self. select_features (fields, r2), fields) if prediction2 > bound:

prediction2 = 0.9 else:

prediction2 = 0.2 distance_matrix. set_value (pr2 ["_id"], r2 ["_id"], 1-prediction2) distance_matrix. set_value (r2 ["_id"], pr2 ["_id"], 1 - prediction2) if prediction > bound:

prediction = 0.9 else:

prediction = 0.2 distance_matrix. set_value (r ["_id"], r ["_id"], 0) distance_matrix. set_value (r2 ["_id"], r2 ["_id"], 0) distance_matrix. set_value (r ["_id"],r2 ["_id"],1-prediction) distance_matrix. set_value (r2 ["_id"], r ["_id"], 1-prediction) distance_matrix = distance_matrix. fillna (0) try:

graphs_matrices = GraphFormation. load_distance_frame (file_name) except:

graphs_matrices = {} file_name = self. process+"_matrix_"+file_name graphs_matrices [c] = distance_matrix pickle. dump (graphs_matrices, open (file_name, "wb")) return graphs_matrices @staticmethod def load_distance_matrix (file):

df = pd. read_pickle (file) #df = df. fillna (0) return pd. DataFrame. as_matrix (df) @staticmethod def load_distance_frame (file):

dict = pickle. load (open (file, "rb")) return dict def select_features (self, fields, dict):

keys = {f ["name"] for f in fields} return {k: dict [k] for k in set (keys) & set (dict. keys ()) }

grouping_graph. py

from scipy. cluster import hierarchy import pickle import pandas as pd from scipy. cluster. hierarchy import fcluster from bson. objectid import ObjectId from matplotlib import pyplot as plt class GraphGrouping:

def __init__ (self, mongo, process):

self. mongo = mongo self. process = process def group_duplicates (self, name, bound, matrix):

data = self. mongo [self. process] dict = pickle. load (open (matrix, "rb")) for g in range (0, len (dict)):

if len (dict [g]) > 0:

groups = {} df = dict [g] df = df. reindex_axis (sorted (df. columns), axis=1) df = df. reindex_axis (sorted (df. index), axis=0) matrix = pd. DataFrame. as_matrix (df) print (matrix) ids = df. columns. values Z = hierarchy. single (matrix) c = fcluster (Z,bound, criterion='distance') names_c = set (c) clusters = [] for i in names_c:

clusters. append ([]) for i, cluster in enumerate (c):

print (cluster) obj_id = ids [i] obj = data. find_one ({"_id": ObjectId (obj_id) }) print (obj) clusters [cluster - 1]. append (obj ["_id"]) for objs in clusters:

self. mongo [self. process+"_group_graph_"+name]. insert ({"cluster": g,"group": objs}) def get_duplicates_groups (self, group_name, cluster):

objects = [] print (self. mongo) pipeline = [{"$match": {"cluster": int (cluster) }}, {"$lookup": {"from": self. process,"foreignField": "_id","localField": "group","as": "fgroup"}}] for o in self. mongo [group_name]. aggregate (pipeline):

print (o) objects. append (o ["fgroup"]) return objects

Размещено на Allbest.ru


Подобные документы

Работы в архивах красиво оформлены согласно требованиям ВУЗов и содержат рисунки, диаграммы, формулы и т.д.
PPT, PPTX и PDF-файлы представлены только в архивах.
Рекомендуем скачать работу.