Factors of successful protection from pressure on business

Concept and economic essence of property rights. Justification and development of the business protection model against possible damage to business activities caused by the influence various external and internal market factors and economic conditions.

Рубрика Экономико-математическое моделирование
Вид дипломная работа
Язык английский
Дата добавления 11.08.2020
Размер файла 5,0 M

Отправить свою хорошую работу в базу знаний просто. Используйте форму, расположенную ниже

Студенты, аспиранты, молодые ученые, использующие базу знаний в своей учебе и работе, будут вам очень благодарны.

## is_working_dums$is_working
## is_working_dums$largest_fed_districtsMoscow 0 1
## 0 201 178
## 1 54 49

tst =chisq.test(xtabs(~is_working_dums$largest_fed_districtsMoscow +is_working_dums$is_working))
tst$observed

## is_working_dums$is_working
## is_working_dums$largest_fed_districtsMoscow 0 1
## 0 201 178
## 1 54 49

tst$expected

## is_working_dums$is_working
## is_working_dums$largest_fed_districtsMoscow 0 1
## 0 200.5083 178.4917
## 1 54.4917 48.5083

And repeat the same steps for each target variable.

Target: target_light_clear

# check for distributions
light_clear_vars <-c(
"federal_districts",
"largest_fed_districts",
"macro_okved_code",
"macro_okved_code_group",
"spark_web_site",
"spark_stock_ticket",
"category_by_size_missing",
"category_by_size_melse",
"category_by_size_2_cat",
"administrative_position",
"administrative_connections",
"in_political_party",
"in_association_or_sro",
"case_publications",
"criminal_prosecution",
"capture", "corruption", "barriers",
"have_court_case", "is_guilty", "reviewed_by_bac",
"max_bac_stage", "supported_by_bac_public_council",
"reaction_not_passed_by_applicant", "reaction_consultation",
"reaction_target_letters_control", "to_ombudsman",
"reaction_not_passed_by_bac",
"auth_capital_group",
"cop_stage",
"target_light_clear")

light_clear_cs <-dataset[light_clear_vars]
# exclude missing data of target variables
light_clear_cs <-light_clear_cs[!is.na(light_clear_cs$target_light_clear),]


# this is a very log output, it just returns xtabs for each variable and a target
#for (i in 1:length(is_working_vars)){
# print(xtabs(~light_clear_cs$target_light_clear + light_clear_cs[,i]))
#}

# variables to worry about: federal_districts, macro_okved_code, max_bac_stage

light_clear_vars <-c(
#"federal_districts",
"largest_fed_districts",
# "macro_okved_code",
"macro_okved_code_group",
"spark_web_site",
"spark_stock_ticket",
"category_by_size_missing",
"category_by_size_melse",
"category_by_size_2_cat",
"administrative_position",
"administrative_connections",
"in_political_party",
"in_association_or_sro",
"case_publications",
"criminal_prosecution",
"capture", "corruption", "barriers",
"have_court_case", "is_guilty", "reviewed_by_bac",
"max_bac_stage", "supported_by_bac_public_council",
"reaction_not_passed_by_applicant", "reaction_consultation",
"reaction_target_letters_control", "to_ombudsman",
"reaction_not_passed_by_bac",
"auth_capital_group",
"cop_stage",
"target_light_clear")

light_clear_cs <-dataset[light_clear_vars]
# exclude missing data of target variables
light_clear_cs <-light_clear_cs[!is.na(light_clear_cs$target_light_clear),]

light_clear_cs_mat_stat =chisqmatrix_stat(light_clear_cs)
light_clear_cs_mat_stat <-format( data.frame(light_clear_cs_mat_stat)["target_light_clear"], scientific = F)
light_clear_cs_mat_pval =chisqmatrix_pval(light_clear_cs)
light_clear_cs_mat_pval <-format( data.frame(light_clear_cs_mat_pval)["target_light_clear"], scientific = F)

light_clear_cs_df <-data.frame(c(light_clear_cs_mat_stat, light_clear_cs_mat_pval))
rownames(light_clear_cs_df) <-rownames(light_clear_cs_mat_stat)
colnames(light_clear_cs_df) <-c("Statistic","P-value")
#write_xlsx(data.frame(light_clear_cs_df), 'light_clear_cs.xlsx')
light_clear_cs_df[2]

## P-value
## largest_fed_districts 0.5967922982
## macro_okved_code_group 0.0205476303
## spark_web_site 0.0004345725
## spark_stock_ticket 1.0000000000
## category_by_size_missing 0.0719540353
## category_by_size_melse 0.0175229116
## category_by_size_2_cat 0.2189897300
## administrative_position 0.2052253478
## administrative_connections 0.9941809904
## in_political_party 0.7465046958
## in_association_or_sro 0.0153927012
## case_publications 0.1401052728
## criminal_prosecution 0.6956983834
## capture 0.0035594639
## corruption 1.0000000000
## barriers 0.4254418022
## have_court_case 0.6749204382
## is_guilty 1.0000000000
## reviewed_by_bac 0.6323852679
## max_bac_stage 0.6140938849
## supported_by_bac_public_council 0.1167636343
## reaction_not_passed_by_applicant 0.5418697051
## reaction_consultation 0.4917820492
## reaction_target_letters_control 0.7383852546
## to_ombudsman 0.2213205774
## reaction_not_passed_by_bac 1.0000000000
## auth_capital_group 0.8882452180
## cop_stage 0.2112345131
## target_light_clear NA

#check - looks that code worked ok
chisq.test(xtabs(~dataset$target_light_clear +dataset$macro_okved_code_group))

##
## Pearson's Chi-squared test
##
## data: xtabs(~dataset$target_light_clear + dataset$macro_okved_code_group)
## X-squared = 18.092, df = 8, p-value = 0.02055

tst =chisq.test(xtabs(~dataset$target_light_clear +dataset$is_guilty))
tst$observed

## dataset$is_guilty
## dataset$target_light_clear 0 1
## 0 114 70
## 1 110 66

tst$expected

## dataset$is_guilty
## dataset$target_light_clear 0 1
## 0 114.4889 69.51111
## 1 109.5111 66.48889

It is time to check for categorical data with several levels.

light_clear_dummies <-light_clear_cs[c(#"federal_districts",
"largest_fed_districts", "macro_okved_code_group", "max_bac_stage", "cop_stage", "category_by_size_missing", "category_by_size_melse", "category_by_size_2_cat" )]

light_clear_dummies$max_bac_stage <-as.factor(light_clear_dummies$max_bac_stage)
dums <-dummyVars(" ~ .", data = light_clear_dummies)
light_clear_dums <-data.frame(predict(dums, newdata = light_clear_dummies))
light_clear_dums$target_light_clear <-light_clear_cs$target_light_clear

light_clear_dums_pval =chisqmatrix_pval(light_clear_dums)
light_clear_dums_pval <-format( data.frame(light_clear_dums_pval)["target_light_clear"], scientific = F)
light_clear_dums_stat =chisqmatrix_stat(light_clear_dums)
light_clear_dums_stat <-format( data.frame(light_clear_dums_stat)["target_light_clear"], scientific = F)

light_clear_dums_df <-data.frame(c(light_clear_dums_stat, light_clear_dums_pval))
rownames(light_clear_dums_df) <-rownames(light_clear_dums_stat)
colnames(light_clear_dums_df) <-c("Statistic","P-value")
#write_xlsx(data.frame(light_clear_dums_df), 'light_clear_cs_dums.xlsx')
light_clear_dums_df[2]

## P-value
## largest_fed_districtsCentral 1.00000000
## largest_fed_districtsFar_Siberia 1.00000000
## largest_fed_districtsMoscow 0.57377098
## largest_fed_districtsMoscow_region 0.16044629
## largest_fed_districtsNorth_West 0.22704050
## largest_fed_districtsSouth 0.74947410
## largest_fed_districtsUrals 0.55454707
## largest_fed_districtsVolga 1.00000000
## macro_okved_code_groupBuilding 0.14137979
## macro_okved_code_groupFinancial_insurance 0.07398948
## macro_okved_code_groupmanufacturing 0.51397087
## macro_okved_code_groupother_categories 0.36000192
## macro_okved_code_groupreal_estate 0.04457033
## macro_okved_code_grouprural 0.73488154
## macro_okved_code_groupScience 0.03005041
## macro_okved_code_groupTrading 0.45092390
## macro_okved_code_groupTransportation 0.86905732
## max_bac_stage.0 0.67359688
## max_bac_stage.1 0.74948683
## max_bac_stage.2 0.90441209
## max_bac_stage.3 0.73956188
## max_bac_stage.4 0.05609622
## max_bac_stage.5 0.57901134
## max_bac_stage.6 0.46403398
## cop_stageCouncil_discussion 0.27831740
## cop_stageInformation_collection 0.45373043
## cop_stageResolution 0.09740124
## category_by_size_missingBig 0.84373157
## category_by_size_missingMedium 0.18403864
## category_by_size_missingMicro 0.01752291
## category_by_size_missingSmall 0.11067617
## category_by_size_melseElse 0.01752291
## category_by_size_melseMicro 0.01752291
## category_by_size_2_catBig 0.21898973
## category_by_size_2_catSmall 0.21898973
## target_light_clear NA

# check if code worked ok
# check high p_values whether ther are erros in code or not
# looks like ok the values are pretty close to expected
xtabs(~light_clear_dums$macro_okved_code_groupScience +light_clear_dums$target_light_clear)

## light_clear_dums$target_light_clear
## light_clear_dums$macro_okved_code_groupScience 0 1
## 0 174 154
## 1 10 22

tst =chisq.test(xtabs(~light_clear_dums$macro_okved_code_groupScience +light_clear_dums$target_light_clear))
tst$observed

## light_clear_dums$target_light_clear
## light_clear_dums$macro_okved_code_groupScience 0 1
## 0 174 154
## 1 10 22

tst$expected

## light_clear_dums$target_light_clear
## light_clear_dums$macro_okved_code_groupScience 0 1
## 0 167.64444 160.35556
## 1 16.35556 15.64444

xtabs(~light_clear_dums$largest_fed_districtsMoscow +light_clear_dums$target_light_clear)

## light_clear_dums$target_light_clear
## light_clear_dums$largest_fed_districtsMoscow 0 1
## 0 143 142
## 1 41 34

tst =chisq.test(xtabs(~light_clear_dums$largest_fed_districtsMoscow +light_clear_dums$target_light_clear))
tst$observed

## light_clear_dums$target_light_clear
## light_clear_dums$largest_fed_districtsMoscow 0 1
## 0 143 142
## 1 41 34

tst$expected

## light_clear_dums$target_light_clear
## light_clear_dums$largest_fed_districtsMoscow 0 1
## 0 145.66667 139.33333
## 1 38.33333 36.66667

Target: target_light_extended

# check for distributions
light_extended_vars <-c(
#"federal_districts",
"largest_fed_districts",
#"macro_okved_code",
"macro_okved_code_group",
"spark_web_site",
"spark_stock_ticket",
"category_by_size_missing",
"category_by_size_melse",
"category_by_size_2_cat",
"administrative_position",
"administrative_connections",
"in_political_party",
"in_association_or_sro",
"case_publications",
"criminal_prosecution",
"capture", "corruption", "barriers",
"have_court_case", "is_guilty", "reviewed_by_bac",
"max_bac_stage", "supported_by_bac_public_council",
"reaction_not_passed_by_applicant", "reaction_consultation",
"reaction_target_letters_control", "to_ombudsman",
"reaction_not_passed_by_bac",
"auth_capital_group",
"cop_stage",
"target_light_extended")

light_extended_cs <-dataset[light_extended_vars]
# exclude missing data of target variables
light_extended_cs <-light_extended_cs[!is.na(light_extended_cs$target_light_extended),]


# this is a very log output, it just returns xtabs for each variable and a target
#for (i in 1:length(is_working_vars)){
# print(xtabs(~light_extended_cs$target_light_extended + light_extended_cs[,i]))
#}

# variables to worry about: federal_districts, macro_okved_code, reaction_not_passed_by_bac

light_extended_vars <-c(
# "federal_districts",
"largest_fed_districts",
# "macro_okved_code",
"macro_okved_code_group",
"spark_web_site",
"spark_stock_ticket",
"category_by_size_missing",
"category_by_size_melse",
"category_by_size_2_cat",
"administrative_position",
"administrative_connections",
"in_political_party",
"in_association_or_sro",
"case_publications",
"criminal_prosecution",
"capture", "corruption", "barriers",
"have_court_case", "is_guilty", "reviewed_by_bac",
"max_bac_stage", "supported_by_bac_public_council",
"reaction_not_passed_by_applicant", "reaction_consultation",
"reaction_target_letters_control", "to_ombudsman",
"reaction_not_passed_by_bac",
"auth_capital_group",
"cop_stage",
"target_light_extended")

light_extended_cs <-dataset[light_extended_vars]
# exclude missing data of target variables
light_extended_cs <-light_extended_cs[!is.na(light_extended_cs$target_light_extended),]

light_extended_cs_mat_stat =chisqmatrix_stat(light_extended_cs)
light_extended_cs_mat_stat <-format( data.frame(light_extended_cs_mat_stat)["target_light_extended"], scientific = F)
light_extended_cs_mat_pval =chisqmatrix_pval(light_extended_cs)
light_extended_cs_mat_pval <-format( data.frame(light_extended_cs_mat_pval)["target_light_extended"], scientific = F)

light_extended_cs_df <-data.frame(c(light_extended_cs_mat_stat, light_extended_cs_mat_pval))
rownames(light_extended_cs_df) <-rownames(light_extended_cs_mat_stat)
colnames(light_extended_cs_df) <-c("Statistic","P-value")
#write_xlsx(data.frame(light_extended_cs_df), 'light_extended_cs.xlsx')
light_extended_cs_df[2]

## P-value
## largest_fed_districts 0.40839317261
## macro_okved_code_group 0.00604937767
## spark_web_site 0.00002727188
## spark_stock_ticket 0.83651266939
## category_by_size_missing 0.01322400277
## category_by_size_melse 0.00274021416
## category_by_size_2_cat 0.21242355323
## administrative_position 0.34944717127
## administrative_connections 0.91675337150
## in_political_party 0.62304212321
## in_association_or_sro 0.00816162576
## case_publications 0.01112536082
## criminal_prosecution 0.67704214429
## capture 0.00381863178
## corruption 1.00000000000
## barriers 0.36766267753
## have_court_case 0.42885944656
## is_guilty 1.00000000000
## reviewed_by_bac 0.81884704855
## max_bac_stage 0.71531027537
## supported_by_bac_public_council 0.23763136934
## reaction_not_passed_by_applicant 0.53254210174
## reaction_consultation 0.68440521975
## reaction_target_letters_control 0.91096615924
## to_ombudsman 0.22184866378
## reaction_not_passed_by_bac 1.00000000000
## auth_capital_group 0.92089894328
## cop_stage 0.25867496221
## target_light_extended NA

#check - looks that code worked ok
chisq.test(xtabs(~dataset$target_light_extended +dataset$macro_okved_code_group))

##
## Pearson's Chi-squared test
##
## data: xtabs(~dataset$target_light_extended + dataset$macro_okved_code_group)
## X-squared = 21.447, df = 8, p-value = 0.006049

tst =chisq.test(xtabs(~dataset$target_light_extended +dataset$is_guilty))
tst$observed

## dataset$is_guilty
## dataset$target_light_extended 0 1
## 0 114 70
## 1 128 80

tst$expected

## dataset$is_guilty
## dataset$target_light_extended 0 1
## 0 113.5918 70.40816
## 1 128.4082 79.59184

It is time to check for categorical data with several levels.

light_extended_dummies <-light_extended_cs[c(
#"federal_districts",
"largest_fed_districts",
#"macro_okved_code",
"macro_okved_code_group", "max_bac_stage", "cop_stage", "category_by_size_missing", "category_by_size_melse", "category_by_size_2_cat" )]

light_extended_dummies$max_bac_stage <-as.factor(light_extended_dummies$max_bac_stage)
dums <-dummyVars(" ~ .", data = light_extended_dummies)
light_extended_dums <-data.frame(predict(dums, newdata = light_extended_dummies))
light_extended_dums$target_light_extended <-light_extended_cs$target_light_extended

light_extended_dums_pval =chisqmatrix_pval(light_extended_dums)
light_extended_dums_pval <-format( data.frame(light_extended_dums_pval)["target_light_extended"], scientific = F)
light_extended_dums_stat =chisqmatrix_stat(light_extended_dums)
light_extended_dums_stat <-format( data.frame(light_extended_dums_stat)["target_light_extended"], scientific = F)

light_extended_dums_df <-data.frame(c(light_extended_dums_stat, light_extended_dums_pval))
rownames(light_extended_dums_df) <-rownames(light_extended_dums_stat)
colnames(light_extended_dums_df) <-c("Statistic","P-value")
#write_xlsx(data.frame(light_extended_dums_df), 'light_extended_dums.xlsx')
light_extended_dums_df[2]

## P-value
## largest_fed_districtsCentral 0.978140031
## largest_fed_districtsFar_Siberia 0.992589979
## largest_fed_districtsMoscow 0.458983283
## largest_fed_districtsMoscow_region 0.095345164
## largest_fed_districtsNorth_West 0.198391788
## largest_fed_districtsSouth 0.768685128
## largest_fed_districtsUrals 0.385254299
## largest_fed_districtsVolga 1.000000000
## macro_okved_code_groupBuilding 0.135740588
## macro_okved_code_groupFinancial_insurance 0.030840554
## macro_okved_code_groupmanufacturing 0.672489344
## macro_okved_code_groupother_categories 0.605533043
## macro_okved_code_groupreal_estate 0.017445468
## macro_okved_code_grouprural 0.541873299
## macro_okved_code_groupScience 0.024955324
## macro_okved_code_groupTrading 0.404307936
## macro_okved_code_groupTransportation 0.612668697
## max_bac_stage.0 0.624449893
## max_bac_stage.1 0.574128222
## max_bac_stage.2 0.965381635
## max_bac_stage.3 0.872470089
## max_bac_stage.4 0.089875421
## max_bac_stage.5 0.716274639
## max_bac_stage.6 0.569288459
## cop_stageCouncil_discussion 0.407972694
## cop_stageInformation_collection 0.375015378
## cop_stageResolution 0.126521044
## category_by_size_missingBig 0.857503102
## category_by_size_missingMedium 0.169497797
## category_by_size_missingMicro 0.002740214
## category_by_size_missingSmall 0.021530300
## category_by_size_melseElse 0.002740214
## category_by_size_melseMicro 0.002740214
## category_by_size_2_catBig 0.212423553
## category_by_size_2_catSmall 0.212423553
## target_light_extended NA

# check if code worked ok
# check high p_values whether ther are erros in code or not
# looks like ok the values are pretty close to expected
xtabs(~light_extended_dums$macro_okved_code_groupScience +light_extended_dums$target_light_extended)

## light_extended_dums$target_light_extended
## light_extended_dums$macro_okved_code_groupScience 0 1
## 0 174 182
## 1 10 26

tst =chisq.test(xtabs(~light_extended_dums$macro_okved_code_groupScience +light_extended_dums$target_light_extended))
tst$observed

## light_extended_dums$target_light_extended
## light_extended_dums$macro_okved_code_groupScience 0 1
## 0 174 182
## 1 10 26

tst$expected

## light_extended_dums$target_light_extended
## light_extended_dums$macro_okved_code_groupScience 0 1
## 0 167.10204 188.89796
## 1 16.89796 19.10204

xtabs(~light_extended_dums$largest_fed_districtsMoscow +light_extended_dums$target_light_extended)

## light_extended_dums$target_light_extended
## light_extended_dums$largest_fed_districtsMoscow 0 1
## 0 143 169
## 1 41 39

tst =chisq.test(xtabs(~light_extended_dums$largest_fed_districtsMoscow +light_extended_dums$target_light_extended))
tst$observed

## light_extended_dums$target_light_extended
## light_extended_dums$largest_fed_districtsMoscow 0 1
## 0 143 169
## 1 41 39

tst$expected

## light_extended_dums$target_light_extended
## light_extended_dums$largest_fed_districtsMoscow 0 1
## 0 146.44898 165.55102
## 1 37.55102 42.44898

Target: target_strong_extended

strong_extended_vars <-c(
# "federal_districts",
"largest_fed_districts",
#"macro_okved_code",
"macro_okved_code_group",
"spark_web_site",
"spark_stock_ticket",
"category_by_size_missing",
"category_by_size_melse",
"category_by_size_2_cat",
"administrative_position",
"administrative_connections",
"in_political_party",
"in_association_or_sro",
"case_publications",
"criminal_prosecution",
"capture", "corruption", "barriers",
"have_court_case", "is_guilty", "reviewed_by_bac",
"max_bac_stage", "supported_by_bac_public_council",
"reaction_not_passed_by_applicant", "reaction_consultation",
"reaction_target_letters_control", "to_ombudsman",
"reaction_not_passed_by_bac",
"auth_capital_group",
"cop_stage",
"target_strong_extended")

strong_extended_cs <-dataset[strong_extended_vars]
# exclude missing data of target variables
strong_extended_cs <-strong_extended_cs[!is.na(strong_extended_cs$target_strong_extended),]

# this is a very log output, it just returns xtabs for each variable and a target
# for (i in 1:length(strong_extended_vars)){
# print(xtabs(~strong_extended_cs$target_strong_extended + strong_extended_cs[,i]))
# }

# variables to worry about: macro_okved_code_group (some low categories), auth_capital_group

strong_extended_vars <-c(
#"federal_districts",
"largest_fed_districts",
#"macro_okved_code",
"macro_okved_code_group",
"spark_web_site",
"spark_stock_ticket",
"category_by_size_missing",
"category_by_size_melse",
"category_by_size_2_cat",
"administrative_position",
"administrative_connections",
"in_political_party",
"in_association_or_sro",
"case_publications",
"criminal_prosecution",
"capture", "corruption", "barriers",
"have_court_case", "is_guilty", "reviewed_by_bac",
"max_bac_stage", "supported_by_bac_public_council",
"reaction_not_passed_by_applicant", "reaction_consultation",
"reaction_target_letters_control", "to_ombudsman",
"reaction_not_passed_by_bac",
"auth_capital_group",
"cop_stage",
"target_strong_extended")

strong_extended_cs <-dataset[strong_extended_vars]
# exclude missing data of target variables
strong_extended_cs <-strong_extended_cs[!is.na(strong_extended_cs$target_strong_extended),]

strong_extended_cs_mat_stat =chisqmatrix_stat(strong_extended_cs)
strong_extended_cs_mat_stat <-format( data.frame(strong_extended_cs_mat_stat)["target_strong_extended"], scientific = F)
target_strong_cs_mat_pval =chisqmatrix_pval(strong_extended_cs)
target_strong_cs_mat_pval <-format( data.frame(target_strong_cs_mat_pval)["target_strong_extended"], scientific = F)

strong_extended_cs_df <-data.frame(c(strong_extended_cs_mat_stat, target_strong_cs_mat_pval))
rownames(strong_extended_cs_df) <-rownames(strong_extended_cs_mat_stat)
colnames(strong_extended_cs_df) <-c("Statistic","P-value")
# write_xlsx(data.frame(strong_extended_cs_df), 'strong_extended_cs.xlsx')
strong_extended_cs_df[2]

## P-value
## largest_fed_districts 0.2150360448
## macro_okved_code_group 0.0079194198
## spark_web_site 0.0003237673
## spark_stock_ticket 1.0000000000
## category_by_size_missing 0.1438133617
## category_by_size_melse 0.2263052628
## category_by_size_2_cat 0.0309375079
## administrative_position 0.6204024134
## administrative_connections 0.8772617872
## in_political_party 0.7271179854
## in_association_or_sro 0.0178374213
## case_publications 0.0440158847
## criminal_prosecution 0.2841037763
## capture 0.0561751806
## corruption 1.0000000000
## barriers 0.0345784864
## have_court_case 1.0000000000
## is_guilty 0.9369415164
## reviewed_by_bac 1.0000000000
## max_bac_stage 0.6420572726
## supported_by_bac_public_council 1.0000000000
## reaction_not_passed_by_applicant 0.2078206642
## reaction_consultation 0.0328155060
## reaction_target_letters_control 0.5880917327
## to_ombudsman 0.9275712728
## reaction_not_passed_by_bac 1.0000000000
## auth_capital_group 0.5121089485
## cop_stage 0.9825437801
## target_strong_extended NA

Looks like a problems with distribution here. So while analysing - stick to the dummy variables.

#check - looks that code worked ok
chisq.test(xtabs(~dataset$target_strong_extended +dataset$macro_okved_code_group))

## Warning in chisq.test(xtabs(~dataset$target_strong_extended +
## dataset$macro_okved_code_group)): Chi-squared approximation may be
## incorrect

##
## Pearson's Chi-squared test
##
## data: xtabs(~dataset$target_strong_extended + dataset$macro_okved_code_group)
## X-squared = 20.723, df = 8, p-value = 0.007919

tst =chisq.test(xtabs(~dataset$target_strong_extended +dataset$is_guilty))
tst$observed

## dataset$is_guilty
## dataset$target_strong_extended 0 1
## 0 185 116
## 1 57 34

tst$expected

## dataset$is_guilty
## dataset$target_strong_extended 0 1
## 0 185.82143 115.17857
## 1 56.17857 34.82143

It is time to check for categorical data with several levels.

strong_extended_dummies <-strong_extended_cs[c(
#"federal_districts",
"largest_fed_districts",
#"macro_okved_code",
"macro_okved_code_group", "max_bac_stage", "cop_stage", "category_by_size_missing", "category_by_size_melse", "category_by_size_2_cat" )]

strong_extended_dummies$max_bac_stage <-as.factor(strong_extended_dummies$max_bac_stage)
dums <-dummyVars(" ~ .", data = strong_extended_dummies)
strong_extended_dums <-data.frame(predict(dums, newdata = strong_extended_dummies))
strong_extended_dums$target_strong_extended <-strong_extended_cs$target_strong_extended

strong_extended_dums_pval =chisqmatrix_pval(strong_extended_dums)
strong_extended_dums_pval <-format( data.frame(strong_extended_dums_pval)["target_strong_extended"], scientific = F)
strong_extended_dums_stat =chisqmatrix_stat(strong_extended_dums)
strong_extended_dums_stat <-format( data.frame(strong_extended_dums_stat)["target_strong_extended"], scientific = F)

strong_extended_dums_df <-data.frame(c(strong_extended_dums_stat, strong_extended_dums_pval))
rownames(strong_extended_dums_df) <-rownames(strong_extended_dums_stat)
colnames(strong_extended_dums_df) <-c("Statistic","P-value")
#write_xlsx(data.frame(strong_extended_dums_df), 'strong_extended_cs_dums.xlsx')
strong_extended_dums_df[2]

## P-value
## largest_fed_districtsCentral 0.879857641
## largest_fed_districtsFar_Siberia 0.449858657
## largest_fed_districtsMoscow 0.567016227
## largest_fed_districtsMoscow_region 0.013451367
## largest_fed_districtsNorth_West 0.684956709
## largest_fed_districtsSouth 0.449858657
## largest_fed_districtsUrals 0.938092359
## largest_fed_districtsVolga 0.700121434
## macro_okved_code_groupBuilding 0.470209700
## macro_okved_code_groupFinancial_insurance 0.611935138
## macro_okved_code_groupmanufacturing 0.240605743
## macro_okved_code_groupother_categories 0.231483398
## macro_okved_code_groupreal_estate 0.109560809
## macro_okved_code_grouprural 0.842081008
## macro_okved_code_groupScience 0.003088035
## macro_okved_code_groupTrading 0.216097020
## macro_okved_code_groupTransportation 0.180646635
## max_bac_stage.0 0.259830627
## max_bac_stage.1 0.399437873
## max_bac_stage.2 1.000000000
## max_bac_stage.3 0.739848567
## max_bac_stage.4 0.797639824
## max_bac_stage.5 0.752324424
## max_bac_stage.6 0.707455436
## cop_stageCouncil_discussion 0.958172266
## cop_stageInformation_collection 1.000000000
## cop_stageResolution 1.000000000
## category_by_size_missingBig 0.180551008
## category_by_size_missingMedium 0.176442012
## category_by_size_missingMicro 0.226305263
## category_by_size_missingSmall 0.682311757
## category_by_size_melseElse 0.226305263
## category_by_size_melseMicro 0.226305263
## category_by_size_2_catBig 0.030937508
## category_by_size_2_catSmall 0.030937508
## target_strong_extended NA

# check if code worked ok
# check high p_values whether ther are erros in code or not
# looks like ok the values are pretty close to expected
xtabs(~strong_extended_dums$macro_okved_code_groupScience +strong_extended_dums$target_strong_extended)

## strong_extended_dums$target_strong_extended
## strong_extended_dums$macro_okved_code_groupScience 0 1
## 0 281 75
## 1 20 16

tst =chisq.test(xtabs(~strong_extended_dums$macro_okved_code_groupScience +strong_extended_dums$target_strong_extended))
tst$observed

## strong_extended_dums$target_strong_extended
## strong_extended_dums$macro_okved_code_groupScience 0 1
## 0 281 75
## 1 20 16

tst$expected

## strong_extended_dums$target_strong_extended
## strong_extended_dums$macro_okved_code_groupScience 0 1
## 0 273.35714 82.642857
## 1 27.64286 8.357143

xtabs(~strong_extended_dums$macro_okved_code_groupTransportation +strong_extended_dums$target_strong_extended)

## strong_extended_dums$target_strong_extended
## strong_extended_dums$macro_okved_code_groupTransportation 0 1
## 0 286 90
## 1 15 1

tst =chisq.test(xtabs(~strong_extended_dums$macro_okved_code_groupTransportation +strong_extended_dums$target_strong_extended))

## Warning in
## chisq.test(xtabs(~strong_extended_dums$macro_okved_code_groupTransportation
## + : Chi-squared approximation may be incorrect

tst$observed

## strong_extended_dums$target_strong_extended
## strong_extended_dums$macro_okved_code_groupTransportation 0 1
## 0 286 90
## 1 15 1

tst$expected

## strong_extended_dums$target_strong_extended
## strong_extended_dums$macro_okved_code_groupTransportation 0
## 0 288.71429
## 1 12.28571
## strong_extended_dums$target_strong_extended
## strong_extended_dums$macro_okved_code_groupTransportation 1
## 0 87.285714
## 1 3.714286

xtabs(~strong_extended_dums$largest_fed_districtsMoscow +strong_extended_dums$target_strong_extended)

## strong_extended_dums$target_strong_extended
## strong_extended_dums$largest_fed_districtsMoscow 0 1
## 0 242 70
## 1 59 21

tst =chisq.test(xtabs(~strong_extended_dums$largest_fed_districtsMoscow +strong_extended_dums$target_strong_extended))
tst$observed

## strong_extended_dums$target_strong_extended
## strong_extended_dums$largest_fed_districtsMoscow 0 1
## 0 242 70
## 1 59 21

tst$expected

## strong_extended_dums$target_strong_extended
## strong_extended_dums$largest_fed_districtsMoscow 0 1
## 0 239.57143 72.42857
## 1 61.42857 18.57143

Part 3. Modelling

TARGET 2 - TARGET LIGHT CLEAR Baseline

light_clear_vars <-c(
c(
"macro_okved_code_group",
# "spark_web_site",
"spark_stock_ticket",
"category_by_size_missing",
#"category_by_size_melse",
#"category_by_size_2_cat",
"administrative_position",
"administrative_connections",
"in_political_party",
"in_association_or_sro",
"case_publications",
"criminal_prosecution",
"capture",
"corruption",
"barriers",
"have_court_case",
"is_guilty",
#"reviewed_by_bac",
#"supported_by_bac_public_council",
#"max_bac_stage",
"cop_stage",
# "reaction_not_passed_by_applicant",
# "reaction_consultation",
# "reaction_target_letters_control",
# "reaction_not_passed_by_bac",
"to_ombudsman",
"target_light_clear")
)

light_clear_data <-dataset[light_clear_vars]
light_clear_data <-light_clear_data[!is.na(light_clear_data$target_light_clear),]
light_clear_data <-light_clear_data[!is.na(light_clear_data$category_by_size_missing),]

light_clear_data$macro_okved_code_group <-factor(light_clear_data$macro_okved_code_group)
#light_clear_data$largest_fed_districts <- factor(light_clear_data$largest_fed_districts)
light_clear_data$category_by_size_missing <-factor(light_clear_data$category_by_size_missing)
light_clear_data$cop_stage <-factor(light_clear_data$cop_stage)


logit_1<-glm(target_light_clear~., family = binomial,data = light_clear_data)
summary(logit_1)

##
## Call:
## glm(formula = target_light_clear ~ ., family = binomial, data = light_clear_data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9222 -1.0905 0.4571 1.0470 2.0104
##
## Coefficients:
## Estimate Std. Error z value
## (Intercept) 1.24167 0.83073 1.495
## macro_okved_code_groupFinancial_insurance -0.48552 0.70260 -0.691
## macro_okved_code_groupmanufacturing 0.42557 0.46075 0.924
## macro_okved_code_groupother_categories 0.71754 0.46382 1.547
## macro_okved_code_groupreal_estate 1.67006 0.55638 3.002
## macro_okved_code_grouprural 0.84125 0.62102 1.355
## macro_okved_code_groupScience 1.51626 0.53473 2.836
## macro_okved_code_groupTrading 0.45803 0.43992 1.041
## macro_okved_code_groupTransportation 0.31485 0.64753 0.486
## spark_stock_ticket -1.24550 0.79960 -1.558
## category_by_size_missingMedium 0.20615 0.68941 0.299
## category_by_size_missingMicro -0.88162 0.52919 -1.666
## category_by_size_missingSmall -0.13956 0.57448 -0.243
## administrative_position 1.11948 0.54230 2.064
## administrative_connections -0.46918 0.32053 -1.464
## in_political_party -0.31557 0.46877 -0.673
## in_association_or_sro 0.61678 0.28510 2.163
## case_publications -0.71444 0.39023 -1.831
## criminal_prosecution -0.18693 0.44130 -0.424
## capture -0.90643 0.36114 -2.510
## corruption 0.18578 0.50033 0.371
## barriers -0.20690 0.55759 -0.371
## have_court_case 0.38369 0.37680 1.018
## is_guilty -0.18523 0.38364 -0.483
## cop_stageInformation_collection -0.09114 0.39690 -0.230
## cop_stageResolution -0.85645 0.32819 -2.610
## to_ombudsman 0.66012 0.41640 1.585
## Pr(>|z|)
## (Intercept) 0.13500
## macro_okved_code_groupFinancial_insurance 0.48955
## macro_okved_code_groupmanufacturing 0.35566
## macro_okved_code_groupother_categories 0.12186
## macro_okved_code_groupreal_estate 0.00269 **
## macro_okved_code_grouprural 0.17554
## macro_okved_code_groupScience 0.00457 **
## macro_okved_code_groupTrading 0.29780
## macro_okved_code_groupTransportation 0.62681
## spark_stock_ticket 0.11931
## category_by_size_missingMedium 0.76492
## category_by_size_missingMicro 0.09572 .
## category_by_size_missingSmall 0.80805
## administrative_position 0.03899 *
## administrative_connections 0.14326
## in_political_party 0.50082
## in_association_or_sro 0.03051 *
## case_publications 0.06713 .
## criminal_prosecution 0.67187
## capture 0.01208 *
## corruption 0.71040
## barriers 0.71060
## have_court_case 0.30854
## is_guilty 0.62923
## cop_stageInformation_collection 0.81838
## cop_stageResolution 0.00907 **
## to_ombudsman 0.11290
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 448.85 on 323 degrees of freedom
## Residual deviance: 390.96 on 297 degrees of freedom
## AIC: 444.96
##
## Number of Fisher Scoring iterations: 4


Подобные документы

  • Характеристика программной среды Business Studio 3.6. Демонстрационная база на примере покупки и доставки офисной мебели. Содержание, временная и логическая очередность операций бизнес-процесса компании "Аккорд" г. Ростов-на-Дону; области моделирования.

    курсовая работа [1,3 M], добавлен 01.06.2014

  • Процесс интеграции технических и программных средств во все аспекты деятельности предприятия. Группа контроллинга и ее задачи. Операционно-ориентированный расчет себестоимости продукта (услуги). Определение времени выполнения и стоимости процесса.

    реферат [547,5 K], добавлен 14.09.2010

  • Модель оценки долгосрочных активов (Capital Asset Pricing Model, САРМ). Оценка доходности и риска на основе исторических данных. Выбор оптимального портфеля из рискованных активов. Риск и неопределенность денежных потоков. Расчет бета-коэффициента.

    презентация [104,1 K], добавлен 30.07.2013

  • Mathematical model of the grinding grating bending process under the action of a meat product load parabolically decreasing along the radius. Determination of the deflection of a knife blade under the action of a parabolic load of the food medium.

    статья [1,3 M], добавлен 20.10.2022

  • Definition and stages of business cycles, their causes and the characteristic of kinds. Types and a continuity of business cycles. Kondratyev's wave. A role of cycles in stabilization of a policy of the state. Great depression as an economic crisis.

    реферат [130,5 K], добавлен 20.03.2011

  • Mission, aims and potential of company. Analysis of the opportunities and threats of international business. Description of the factors that characterize the business opportunities in Finland. The business plan of the penetration to market of Finland.

    курсовая работа [128,3 K], добавлен 04.06.2013

  • Business plans are an important test of clarity of thinking and clarity of the business. Reasons for writing a business plan. Market trends and the market niche for product. Business concept, market analysis. Company organization, financial plan.

    реферат [59,4 K], добавлен 15.09.2012

  • Support of business entities on the part of specialized agencies of the state on world markets. Interconnection of economic diplomacy of Ukraine in international cooperation with influence on the results of foreign economic activity of the country.

    статья [30,1 K], добавлен 19.09.2017

  • Impact of globalization on the way organizations conduct their businesses overseas, in the light of increased outsourcing. The strategies adopted by General Electric. Offshore Outsourcing Business Models. Factors for affect the success of the outsourcing.

    реферат [32,3 K], добавлен 13.10.2011

  • Technical and economic characteristics of medical institutions. Development of an automation project. Justification of the methods of calculating cost-effectiveness. General information about health and organization safety. Providing electrical safety.

    дипломная работа [3,7 M], добавлен 14.05.2014

Работы в архивах красиво оформлены согласно требованиям ВУЗов и содержат рисунки, диаграммы, формулы и т.д.
PPT, PPTX и PDF-файлы представлены только в архивах.
Рекомендуем скачать работу.