podatki <- read.table("/cloud/project/Poglavje 5/Zgled/Zadolženost.csv", header=TRUE, sep=";", dec=",")
head(podatki[5520:5522, ], 3)
## Država Panoga Podjetje Dolg Dobiček LnSredstva Oprijem
## 5520 21 7 SI1216350 17.92704 -10.91115 8.985842 21.1809237
## 5521 21 7 SI1233378 0.00000 11.26977 8.785390 0.8033013
## 5522 21 7 SI1244329 39.43763 14.88862 8.991821 31.7729822
## DD Inflacija Davek
## 5520 0 0.9 20
## 5521 0 0.9 20
## 5522 0 0.9 20
Opis spremenljivk:
podatki$PanogaF <- factor(podatki$Panoga,
levels = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21),
labels = c("A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P","Q", "R", "S", "T", "U"))
podatki$DržavaF <- factor(podatki$Država,
levels = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25),
labels = c("B", "BG", "HR", "CZ", "EST", "FI", "FR", "D", "GR", "HU", "ISL", "IR", "I", "LAT", "LT", "LX","N", "PL", "PT", "SK", "SVN", "ES", "SEV", "CH", "GB"))
podatki$DDF <- factor(podatki$DD,
levels = c(0, 1),
labels = c("D.O.O.", "D.D."))
summary(podatki[c(-1, -2, -3, -8)])
## Dolg Dobiček LnSredstva Oprijem
## Min. : 0.00 Min. :-15.000 Min. : 7.967 Min. : 0.01
## 1st Qu.:10.94 1st Qu.: 1.922 1st Qu.: 9.091 1st Qu.:11.65
## Median :28.31 Median : 4.404 Median : 9.831 Median :30.15
## Mean :29.98 Mean : 5.403 Mean :10.274 Mean :36.16
## 3rd Qu.:45.80 3rd Qu.: 8.086 3rd Qu.:11.071 3rd Qu.:55.99
## Max. :86.18 Max. : 35.000 Max. :15.496 Max. :96.93
##
## Inflacija Davek PanogaF DržavaF
## Min. :-1.700 Min. :10.00 C :2340 GB :1691
## 1st Qu.: 0.000 1st Qu.:22.00 G :2273 D :1392
## Median : 0.600 Median :25.00 F : 714 ES : 930
## Mean : 1.154 Mean :25.31 M : 621 B : 736
## 3rd Qu.: 2.200 3rd Qu.:32.00 L : 535 PL : 476
## Max. :12.000 Max. :33.99 H : 534 CH : 387
## (Other):1760 (Other):3165
## DDF
## D.O.O.:5664
## D.D. :3113
##
##
##
##
##
fit <- glm(Dolg ~ Dobiček + LnSredstva + Oprijem + DDF + Inflacija + Davek,
data = podatki)
summary(fit)
##
## Call:
## glm(formula = Dolg ~ Dobiček + LnSredstva + Oprijem + DDF +
## Inflacija + Davek, data = podatki)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -48.337 -16.626 -1.421 14.073 68.619
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.139528 1.837409 4.974 6.68e-07 ***
## Dobiček -0.187980 0.033288 -5.647 1.68e-08 ***
## LnSredstva 0.612188 0.145329 4.212 2.55e-05 ***
## Oprijem 0.265129 0.007941 33.387 < 2e-16 ***
## DDFD.D. -1.627060 0.482975 -3.369 0.000758 ***
## Inflacija -0.279789 0.175111 -1.598 0.110128
## Davek 0.271909 0.041171 6.604 4.23e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 425.227)
##
## Null deviance: 4314111 on 8776 degrees of freedom
## Residual deviance: 3729241 on 8770 degrees of freedom
## AIC: 78041
##
## Number of Fisher Scoring iterations: 2
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:DescTools':
##
## Recode
vif(fit)
## Dobiček LnSredstva Oprijem DDF Inflacija Davek
## 1.034970 1.037910 1.043845 1.102006 1.549262 1.503731
mean(vif(fit))
## [1] 1.211954
podatki$StdOst <- rstandard(fit)
podatki$CooksD <- cooks.distance(fit)
hist(podatki$StdOst,
main = "Histogram standardiziranih ostankov",
ylab = "Frekvenca",
xlab = "Standardizirani ostanki")
podatki$StdOcene <- scale(fit$fitted.values)
library(ggplot2)
ggplot(podatki, aes(y=StdOst, x=StdOcene)) +
theme_linedraw() +
geom_point() +
xlab("Standardizirane ocenjene vrednosti") +
ylab("Standardizirani ostanki")
head(podatki[order(podatki$StdOst), c("Podjetje", "StdOst") ], 4)
## Podjetje StdOst
## 1571 FR440220689 -2.345224
## 4076 ITVI0176757 -2.339666
## 536 BE0447029250 -2.321691
## 4011 ITSA0347093 -2.315026
head(podatki[order(-podatki$StdOst), c("Podjetje", "StdOst") ], 4)
## Podjetje StdOst
## 5127 PL630774290 3.329522
## 4222 LV000344720 3.256596
## 8123 GB02863246 3.235049
## 3669 IE174196 3.225632
head(podatki[order(-podatki$CooksD), c("Podjetje", "CooksD") ], 4)
## Podjetje CooksD
## 3552 IS4301710469 0.009364964
## 3551 IS4203697789 0.009342747
## 3555 IS4707043060 0.005844427
## 3741 IE387390 0.004948505
podatki <- podatki[c(-3552, -3551), ]
library(Rmisc)
## Loading required package: lattice
## Loading required package: plyr
##
## Attaching package: 'plyr'
## The following object is masked from 'package:ggpubr':
##
## mutate
ocena_povp <- summarySE(podatki,
measurevar="Dolg", #Spremenljivka, za katero računamo povprečje
groupvars=c("PanogaF"), #Enote razdelimo glede na faktor
conf.interval=0.95) #95-% interval zaupanja
library(ggplot2)
ggplot(ocena_povp, aes(x=PanogaF, y=Dolg)) +
geom_bar(position = position_dodge(),
stat = "identity") +
geom_errorbar(aes(ymin = Dolg-ci, ymax = Dolg+ci),
width = 0.1,
position = position_dodge(.9)) +
theme_linedraw()
library(Rmisc)
ocena_povp <- summarySE(podatki,
measurevar="Dolg",
groupvars=c("DržavaF"),
conf.interval=0.95)
library(ggplot2)
ggplot(ocena_povp, aes(x=DržavaF, y=Dolg)) +
geom_bar(position=position_dodge(),
stat="identity") +
geom_errorbar(aes(ymin=Dolg-ci, ymax=Dolg+ci),
width=0.1,
position=position_dodge(.9)) +
theme_linedraw()
library(lme4)
library(lmerTest)
hierar_fit0 <- lmer(Dolg ~ (1 | Panoga) + (1 | Država), #Slučajna reg. konstanta
REML = FALSE, #Metoda ML
data = podatki)
summary(hierar_fit0)
## Linear mixed model fit by maximum likelihood . t-tests use
## Satterthwaite's method [lmerModLmerTest]
## Formula: Dolg ~ (1 | Panoga) + (1 | Država)
## Data: podatki
##
## AIC BIC logLik deviance df.resid
## 77435.5 77463.9 -38713.8 77427.5 8771
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.0583 -0.7629 -0.0648 0.6574 3.8909
##
## Random effects:
## Groups Name Variance Std.Dev.
## Država (Intercept) 68.59 8.282
## Panoga (Intercept) 63.11 7.944
## Residual 390.89 19.771
## Number of obs: 8775, groups: Država, 25; Panoga, 18
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 29.120 2.562 39.076 11.37 5.86e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ICC_panoga = 63.11/(63.11+68.59+390.89)
ICC_panoga
## [1] 0.1207639
ICC_država = 68.59/(63.11+68.59+390.89)
ICC_država
## [1] 0.1312501
hierar_fit1 <- lmer(Dolg ~ Dobiček + LnSredstva + Oprijem + DDF + (1 | Panoga) + (1 | Država),
REML = FALSE,
data = podatki)
anova(hierar_fit0, hierar_fit1, test="Chi")
## Data: podatki
## Models:
## hierar_fit0: Dolg ~ (1 | Panoga) + (1 | Država)
## hierar_fit1: Dolg ~ Dobiček + LnSredstva + Oprijem + DDF + (1 | Panoga) + (1 | Država)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## hierar_fit0 4 77436 77464 -38714 77428
## hierar_fit1 8 76661 76718 -38323 76645 782.12 4 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(hierar_fit1)
## Linear mixed model fit by maximum likelihood . t-tests use
## Satterthwaite's method [lmerModLmerTest]
## Formula:
## Dolg ~ Dobiček + LnSredstva + Oprijem + DDF + (1 | Panoga) +
## (1 | Država)
## Data: podatki
##
## AIC BIC logLik deviance df.resid
## 76661.4 76718.1 -38322.7 76645.4 8767
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.5230 -0.7436 -0.0783 0.6425 4.2694
##
## Random effects:
## Groups Name Variance Std.Dev.
## Država (Intercept) 74.38 8.625
## Panoga (Intercept) 26.29 5.127
## Residual 357.91 18.918
## Number of obs: 8775, groups: Država, 25; Panoga, 18
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 16.134062 2.683512 94.191981 6.012 3.45e-08 ***
## Dobiček -0.230908 0.031160 8754.820441 -7.410 1.38e-13 ***
## LnSredstva 0.438839 0.153115 8762.421897 2.866 0.00417 **
## Oprijem 0.231763 0.008791 8384.640633 26.363 < 2e-16 ***
## DDFD.D. -1.497560 0.538320 8716.595377 -2.782 0.00542 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## Warning in abbreviate(rn, minlength = 6): abbreviate used with non-
## ASCII chars
## (Intr) Dobičk LnSrds Oprijm
## Dobiček -0.093
## LnSredstva -0.562 0.037
## Oprijem -0.143 0.107 -0.019
## DDFD.D. 0.034 -0.001 -0.195 0.040
hierar_fit2 <- lmer(Dolg ~ Dobiček + LnSredstva + Oprijem + DDF + Inflacija + Davek + (1 | Panoga) + (1 | Država),
REML = FALSE,
data = podatki)
anova(hierar_fit1, hierar_fit2, test="Chi")
## Data: podatki
## Models:
## hierar_fit1: Dolg ~ Dobiček + LnSredstva + Oprijem + DDF + (1 | Panoga) + (1 | Država)
## hierar_fit2: Dolg ~ Dobiček + LnSredstva + Oprijem + DDF + Inflacija + Davek + (1 | Panoga) + (1 | Država)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## hierar_fit1 8 76661 76718 -38323 76645
## hierar_fit2 10 76665 76736 -38323 76645 0.3559 2 0.837