Effects of product network relationships on demand in Russian ecommerce
This research analyzes the relationship and influence of the values contained in the Product Recommendation Network, and how they impact on an e-commerce’s demand. We carried out an empirical analysis of the TV category in a major e-commerce from Russia.
Рубрика | Менеджмент и трудовые отношения |
Вид | дипломная работа |
Язык | английский |
Дата добавления | 27.08.2020 |
Размер файла | 3,4 M |
Отправить свою хорошую работу в базу знаний просто. Используйте форму, расположенную ниже
Студенты, аспиранты, молодые ученые, использующие базу знаний в своей учебе и работе, будут вам очень благодарны.
if "Купили более" in name:
salary_all_count = name.split()[2]
elif "за сегодня" in name and "покуп" in name:
salary_today_count = name.split()[0]
elif "за неделю" in name and "покуп" in name:
salary_week_count = name.split()[0]
print(
"ID:" + id + " Name:" + item_name + " Price:" + item_price + " Score:" + item_score + " Sale:" + item_sale + " salary all count:" + salary_all_count + " salary today count:" + salary_today_count + " salary week count:" + salary_week_count + " Reviews:" + reviews)
driver.close()
drivers.remove(driver)
data = ",".join(
[id, item_name, item_price, item_score, item_sale, salary_all_count, salary_today_count, salary_week_count,
reviews])
connect = sqlite3.connect("database.sqlite") # или :memory: чтобы сохранить в RAM
connect.cursor().execute("""INSERT INTO items VALUES (?,?)""", [id, data])
connect.commit()
connect.close()
return data + "," + type
def parse(url, pack, reload=0):
print("---MAIN---")
driver = webdriver.Firefox(options=options)
drivers.append(driver)
driver.set_window_size(1366, 9000) # because firefox not scroll to element
driver.implicitly_wait(wait_time) # seconds
driver.get(url)
main_data = load(url, "main")
if main_data is None:
return
print("---RECOMMENDS---")
recommends = driver.find_elements_by_css_selector('[data-widget="skuShelfCompare"]>div>div>div>div>div>div>a')
current_sleep = 0
while len(recommends) == 0:
current_sleep += 1
if current_sleep > 2:
if reload > 1:
break
driver.close()
drivers.remove(driver)
parse(url, pack, reload+1)
return
time.sleep(1)
print("recWhile")
recommends_data = []
for element in recommends:
recommends_temp_data = load(element.get_property("href").split("?")[0], "recommends")
recommends_data.append(recommends_temp_data)
print("---SPONSORED---")
sponsored = driver.find_elements_by_css_selector('[data-widget="skuShelfGoods"][title="Спонсорские товары"] a')
current_sleep = 0
while len(sponsored) == 0:
current_sleep += 1
if current_sleep > 2:
if reload > 1:
break
driver.close()
drivers.remove(driver)
parse(url, pack, reload+1)
return
driver.save_screenshot("sponsored_screen.png")
print("sponsoredWhile")
time.sleep(1)
sponsored = driver.find_elements_by_css_selector('[data-widget="skuShelfGoods"][title="Спонсорские товары"] a')
sponsored_data = []
for element in sponsored:
sponsored_temp_data = load(element.get_property("href").split("?")[0], "sponsored")
sponsored_data.append(sponsored_temp_data)
print("---ALSO-BUYED---")
also_buyed = driver.find_elements_by_css_selector(
"#__nuxt>div>div.block-vertical>div:nth-child(6)>div>div:nth-child(2)>div>div:nth-child(4) a")
also_buyed_data = []
for element in also_buyed:
also_buyed_data_temp = load(element.get_property("href").split("?")[0], "also_buy")
also_buyed_data.append(also_buyed_data_temp)
driver.close()
drivers.remove(driver)
with open('data/data' + pack + '.csv', 'a') as csvfile:
writer = csv.writer(csvfile, delimiter=';')
writer.writerow([main_data] + recommends_data + sponsored_data + also_buyed_data)
csvfile.close()
def get_id(url):
return list(filter(lambda e: e != '', re.split(r'[\-/]', url)))[-1]
# parse("https://www.ozon.ru/context/detail/id/154925584/", "test")
Appendix B
Code of clustering data
ozon_tv <- read.csv("ozon_finalv3.csv", header=TRUE, sep=",")
#CLustering analysis for main products
library(dplyr) # for data cleaning
library(ISLR) # for college dataset
library(cluster) # for gower similarity and pam
library(Rtsne) # for t-SNE plot
library(ggplot2) # for visualization
# Remove college name before clustering and little bit prepare the data
main_ozon_tv<-ozon_tv[, c(-1, -9: -17)]
main_ozon_tv$Brand = as.factor(main_ozon_tv$Brand)
main_ozon_tv$Product.Name = as.factor(main_ozon_tv$Product.Name)
main_ozon_dist <- daisy (main_ozon_tv, metric = "gower", type = list(logratio = 3))
# Check attributes to ensure the correct methods are being used
summary(main_ozon_dist)
#Create matrix
df_mat <- as.matrix(main_ozon_dist)
# Output most similar pair
main_ozon_tv[which(df_mat == max(df_mat[df_mat != max(df_mat)]),
arr.ind = TRUE)[1, ], ]
#Choosing a clustering algorithm #Calculate silhouette width for many k using PAM
sil_width <- c(NA)
for(i in 2:12){
pam_fit <- pam(main_ozon_dist,
diss = TRUE,
k = i)
sil_width[i] <- pam_fit$silinfo$avg.width
}
# Plot sihouette width (higher is better)
plot(1:12, sil_width,
xlab = "Number of clusters",
ylab = "Silhouette Width")
lines(1:12, sil_width)
#Now we understand, that we have to use 7 clasters, let`s do it!
#Cluster Interpretation
library(cluster)
library(ISLR)
pam_fit <- pam(main_ozon_dist, diss = TRUE, k = 7)
# IT works
main_ozon_tv[pam_fit$medoids, ]
#Plotting the results
library(Rtsne)
tsne_obj <- Rtsne(main_ozon_dist, is_distance = TRUE)
tsne_data <- tsne_obj$Y %>%
data.frame() %>%
setNames(c("X", "Y")) %>%
mutate(cluster = factor(pam_fit$clustering))
#Plot the results
ggplot(aes(x = X, y = Y), data = tsne_data) +
geom_point(aes(color = cluster))
#Save the segments inside our dataset
main_ozon_tv$segments = tsne_data$cluster
#Create dataset with relusts of our segmentation
seg_main_ozon=main_ozon_tv %>% group_by(segments) %>%summarise(avg_rating_main = mean(Rating_main), avg_price_main = mean(Price_main),avg_discout_main = mean(Discount_main),avg_review_main = mean(Reviews_main),avg_sales_main=(mean(Sales_main)))
main_ozon_tv$Brand = as.character(main_ozon_tv$Brand)
main_ozon_tv$Product.Name = as.character(main_ozon_tv$Product.Name)
seg_main_sales = main_ozon_tv %>% group_by(segments) %>% count(mean(Sales_main)) %>% filter(n==max(n))
seg_main_brand = main_ozon_tv %>% group_by(segments) %>% count(Brand) %>% filter(n==max(n))
seg_main
#Final cluster
seg_main_all = main_ozon_tv %>% group_by(segments) %>% count(Product.Name, mean(Sales_main),mean(Price_main), mean(Rating_main),mean(Discount_main), mean(Reviews_main)) %>% filter(n==max(n))
main_ozon_tv
#Numbers of our users by segments
main_ozon_tv%>% group_by(segments) %>% tally()
#Clustering analysis for also viewed products
library(dplyr) # for data cleaning
library(ISLR) # for college dataset
library(cluster) # for gower similarity and pam
library(Rtsne) # for t-SNE plot
library(ggplot2) # for visualization
# Remove college name before clustering and little bit prepare the data
av_ozon_tv<-ozon_tv[, c(-1, -4: -8, -14:-17)]
av_ozon_tv$Brand = as.factor(av_ozon_tv$Brand)
av_ozon_tv$Product.Name = as.factor(av_ozon_tv$Product.Name)
av_ozon_dist <- daisy (av_ozon_tv, metric = "gower", type = list(logratio = 3))
# Check attributes to ensure the correct methods are being used
summary(av_ozon_dist)
#Create matrix
av_df_mat <- as.matrix(av_ozon_dist)
# Output most similar pair
av_ozon_tv[which(av_df_mat == max(av_df_mat[av_df_mat != max(av_df_mat)]),
arr.ind = TRUE)[1, ], ]
#Choosing a clustering algorithm
#Calculate silhouette width for many k using PAM
sil_width <- c(NA)
for(i in 2:10){
pam_fit <- pam(av_ozon_dist,
diss = TRUE,
k = i)
sil_width[i] <- pam_fit$silinfo$avg.width
}
# Plot sihouette width (higher is better)
plot(1:10, sil_width,
xlab = "Number of clusters",
ylab = "Silhouette Width")
lines(1:10, sil_width)
#Now we understand, that we have to use 7 clasters, let`s do it!
#Cluster Interpretation
library(cluster)
library(ISLR)
pam_fit <- pam(av_ozon_dist, diss = TRUE, k = 7)
# IT works
av_ozon_tv[pam_fit$medoids, ]
#Plotting the results
library(Rtsne)
av_tsne_obj <- Rtsne(av_ozon_dist, is_distance = TRUE)
av_tsne_obj <- av_tsne_obj$Y %>%
data.frame() %>%
setNames(c("X", "Y")) %>%
mutate(cluster = factor(pam_fit$clustering))
#Plot the results
ggplot(aes(x = X, y = Y), data = av_tsne_obj) +
geom_point(aes(color = cluster))
#Save the segments inside our dataset
av_ozon_tv$segments = av_tsne_obj$cluster
#Create dataset with relusts of our segmentation
seg_av_ozon=av_ozon_tv %>% group_by(segments) %>% summarise(avg_sales_av=mean(SAL_av),avg_price_av = mean(AP_av),avg_rating_av = mean(AR_av),avg_discount_av = mean(AD_av),avg_number_of_reviews_av = (mean(ANR_av)))
av_ozon_tv$Brand = as.character(av_ozon_tv$Brand)
av_ozon_tv$Product.Name = as.character(av_ozon_tv$Product.Name)
seg_av_sales = av_ozon_tv %>% group_by(segments) %>% count(mean(SAL_av)) %>% filter(n==max(n))
seg_av_brand = av_ozon_tv %>% group_by(segments) %>% count(Brand) %>% filter(n==max(n))
#Final cluster
seg_av_all = av_ozon_tv %>% group_by(segments) %>% count(Product.Name, mean(SAL_av),mean(AP_av),mean(AR_av), mean(AD_av)) %>% filter(n==max(n))
av_ozon_tv
#Numbers of our users by segments
av_ozon_tv%>% group_by(segments) %>% tally()
seg_lm_ozon<-merge(seg_main_ozon,seg_av_ozon)
#LM for seg_mean_ozon + seg_av_prod
library(car)
LM_main <- lm(avg_sales_main ~ avg_review_main + avg_rating_main + avg_sales_av + avg_price_av + avg_rating_av + avg_discount_av + avg_number_of_reviews_av, data=seg_lm_ozon)
summary(LM_main)
#Clustering analysis for co-purchased products
library(dplyr) # for data cleaning
library(ISLR) # for college dataset
library(cluster) # for gower similarity and pam
library(Rtsne) # for t-SNE plot
library(ggplot2) # for visualization
# Remove college name before clustering and little bit prepare the data
cp_ozon_tv<-ozon_tv[, c(-1, -4: -14)]
#here I deleted CO-purchase coloumn (binary)
cp_ozon_tv$Brand = as.factor(cp_ozon_tv$Brand)
cp_ozon_tv$Product.Name = as.factor(cp_ozon_tv$Product.Name)
cp_ozon_dist <- daisy (cp_ozon_tv, metric = "gower", type = list(logratio = 3))
# Check attributes to ensure the correct methods are being used
summary(cp_ozon_dist)
#Create matrix
cp_df_mat <- as.matrix(cp_ozon_dist)
# Output most similar pair
cp_ozon_tv[which(cp_df_mat == max(cp_df_mat[cp_df_mat != max(cp_df_mat)]),
arr.ind = TRUE)[1, ], ]
#Choosing a clustering algorithm
#Calculate silhouette width for many k using PAM
sil_width <- c (NA)
for(i in 2:15){
pam_fit <- pam(cp_ozon_dist,
diss = TRUE,
k = i)
sil_width[i] <- pam_fit$silinfo$avg.width
}
# Plot sihouette width (higher is better)
plot(1:15, sil_width,
xlab = "Number of clusters",
ylab = "Silhouette Width")
lines(1:15, sil_width)
#Now we understand, that we have to use 11 clasters, let`s do it!
#Cluster Interpretation
library(cluster)
library(ISLR)
pam_fit <- pam(av_ozon_dist, diss = TRUE, k = 11)
# IT works
cp_ozon_tv[pam_fit$medoids, ]
#Plotting the results
library(Rtsne)
cp_tsne_obj <- Rtsne(cp_ozon_dist, is_distance = TRUE)
cp_tsne_obj <- cp_tsne_obj$Y %>%
data.frame() %>%
setNames(c("X", "Y")) %>%
mutate(cluster = factor(pam_fit$clustering))
#Plot the results
ggplot(aes(x = X, y = Y), data = cp_tsne_obj) +
geom_point(aes(color = cluster))
#Save the segments inside our dataset
cp_ozon_tv$segments = cp_tsne_obj$cluster
#Create dataset with relusts of our segmentation
seg_cp_ozon=cp_ozon_tv %>% group_by(segments) %>% summarise(avg_price_av = mean(AP_cp),avg_rating_av = mean(AR_cp),avg_number_of_reviews_av = (mean(ANR_cp)))
cp_ozon_tv$Brand = as.character(cp_ozon_tv$Brand)
cp_ozon_tv$Product.Name = as.character(cp_ozon_tv$Product.Name)
seg_cp_price = cp_ozon_tv %>% group_by(segments) %>% count(mean(AP_cp)) %>% filter(n==max(n))
seg_cp_brand = cp_ozon_tv %>% group_by(segments) %>% count(Brand) %>% filter(n==max(n))
#Final cluster
seg_cp_all = cp_ozon_tv %>% group_by(segments) %>% count(Product.Name, mean(AP_cp), mean(AR_cp), mean(ANR_cp)) %>% filter(n==max(n))
cp_ozon_tv
#Numbers of our users by segments
cp_ozon_tv%>% group_by(segments) %>% tally()
#First regression attempts
library(car)
LM_control <- lm(Sales_main ~ Price_main + Rating_main + Discount_main + Reviews_main, data=ozon_tv)
summary(LM_control)
LM_sales <- lm(Sales_main ~ .-ID -Brand -Product.Name -Co_purchase, data=ozon_tv)
summary(LM_sales)
LM_rating <- lm(Rating_main ~ .-ID -Brand -Product.Name -Co_purchase, data=ozon_tv)
summary(LM_rating)
LM_reviews <- lm(Reviews_main ~ .-ID -Brand -Product.Name -Co_purchase, data=ozon_tv)
summary(LM_reviews)
LogM_sales <- lm(log(Sales_main) ~ log(Discount_main), data=ozon_tv)
summary(LogM_sales)
#Descriptive statistics
install.packages("pastecs")
library(pastecs)
descriptive <- stat.desc(ozon_tv[, 4:13])
round(descriptive, 2)
head(descriptive)
descriptive
str(descriptive)
decriptive_table <- table(descriptive)
as.data.frame(descriptive)
descriptive
install.packages("ggpubr")
library(ggpubr)
#ratings
ggboxplot(ozon_tv, y = "Rating_main", width = 0.5)
#reviews
ggboxplot(ozon_tv, y = "Reviews_main", width = 0.5)
#Discount
ggboxplot(ozon_tv, y = "Discount_main", width = 0.5)
#Price
ggboxplot(ozon_tv, y = "Price_main", width = 0.5)
#Sales
ggboxplot(ozon_tv, y = "Sales_main", width = 0.5)
#histograms
#sales
gghistogram(ozon_tv, x = "Sales_main", bins = 9,
add = "mean")
gghistogram(ozon_tv, x = "Rating_main", bins = 9,
add = "mean")
gghistogram(ozon_tv, x = "Discount_main", bins = 9,
add = "mean")
#qqplots
ggqqplot(ozon_tv, x = "Reviews_main")
ggqqplot(ozon_tv, x = "Rating_main")
ggqqplot(ozon_tv, x = "Sales_main")
#Sales by brand
ozon_brands <- read.csv("Ozon_brands.csv", header=TRUE, sep=",")
ggboxplot(ozon_brands, x = "Brand", y = "Sales_main",
color = "Brand")
ggstripchart(ozon_brands, x = "Brand", y = "Sales_main",
color = "Brand",
add = "mean_sd")
ggbarplot(df, x = "Brands", y = "Sales",
color = "Eye", position = position_dodge(),
palette = c("brown", "blue", "gold", "green"))
#Fitting the model
#Regression without outliers
#Without outliers first attempt
ozon_outliers <- read.csv("Ozon_outliers.csv", header=TRUE, sep=",")
LM_sales_outliers <- lm(Sales_main ~ .-ID -Brand -Product.Name -Co_purchase, data=ozon_outliers)
summary(LM_sales_outliers)
LM_rating_outliers <- lm(Rating_main ~ .-ID -Brand -Product.Name -Co_purchase, data=ozon_outliers)
summary(LM_rating_outliers)
LM_reviews_outliers <- lm(Reviews_main ~ .-ID -Brand -Product.Name -Co_purchase, data=ozon_outliers)
summary(LM_reviews_outliers)
#Without outliers second attempt
ozon_outliers2 <- read.csv("Ozon_outliers2.csv", header=TRUE, sep=",")
LM_sales_outliers2 <- lm(Sales_main ~ .-ID -Brand -Product.Name -Co_purchase, data=ozon_outliers2)
summary(LM_sales_outliers2)
LM_rating_outliers2 <- lm(Rating_main ~ .-ID -Brand -Product.Name -Co_purchase, data=ozon_outliers2)
summary(LM_rating_outliers2)
LM_reviews_outliers2 <- lm(Reviews_main ~ .-ID -Brand -Product.Name -Co_purchase, data=ozon_outliers2)
summary(LM_reviews_outliers2)
Размещено на Allbest.ru
Подобные документы
Origins of and reasons for product placement: history of product placement in the cinema, sponsored shows. Factors that can influence the cost of a placement. Branded entertainment in all its forms: series and television programs, novels and plays.
курсовая работа [42,1 K], добавлен 16.10.2013Improving the business processes of customer relationship management through automation. Solutions the problem of the absence of automation of customer related business processes. Develop templates to support ongoing processes of customer relationships.
реферат [173,6 K], добавлен 14.02.2016Оргтехника как основа для работы офиса, ее типы и функциональные особенности, значение. Необходимость использования компьютера, ее обоснование. Информационные системы в управлении и принципы их формирования. Модели продаж CRM-систем On-demand (или SaaS).
курсовая работа [1,6 M], добавлен 01.04.2012Evaluation of urban public transport system in Indonesia, the possibility of its effective development. Analysis of influence factors by using the Ishikawa Cause and Effect diagram and also the use of Pareto analysis. Using business process reengineering.
контрольная работа [398,2 K], добавлен 21.04.2014The impact of management and leadership styles on strategic decisions. Creating a leadership strategy that supports organizational direction. Appropriate methods to review current leadership requirements. Plan for the development of future situations.
курсовая работа [36,2 K], добавлен 20.05.2015Selected aspects of stimulation of scientific thinking. Meta-skills. Methods of critical and creative thinking. Analysis of the decision-making methods without use of numerical values of probability (exemplificative of the investment projects).
аттестационная работа [196,7 K], добавлен 15.10.2008Рассмотрение концепции Customer Relationship Management по управлению взаимоотношениями с клиентами. Возможности CRM-систем, их влияние на эффективность бизнеса. Разработка, реализация и стоимость проекта внедрения CRM-системы для ЗАО "Сибтехнология".
дипломная работа [5,5 M], добавлен 15.09.2012Critical literature review. Apparel industry overview: Porter’s Five Forces framework, PESTLE, competitors analysis, key success factors of the industry. Bershka’s business model. Integration-responsiveness framework. Critical evaluation of chosen issue.
контрольная работа [29,1 K], добавлен 04.10.2014Impact of globalization on the way organizations conduct their businesses overseas, in the light of increased outsourcing. The strategies adopted by General Electric. Offshore Outsourcing Business Models. Factors for affect the success of the outsourcing.
реферат [32,3 K], добавлен 13.10.2011Major factors of success of managers. Effective achievement of the organizational purposes. Use of "emotional investigation". Providing support to employees. That is appeal charisma. Positive morale and recognition. Feedback of the head with workers.
презентация [1,8 M], добавлен 15.07.2012