podatki <- read.table("/cloud/project/Poglavje 5/Zgled/Zadolženost.csv", header=TRUE, sep=";", dec=",")

head(podatki[5520:5522, ], 3)
##      Država Panoga  Podjetje     Dolg   Dobiček LnSredstva    Oprijem
## 5520     21      7 SI1216350 17.92704 -10.91115   8.985842 21.1809237
## 5521     21      7 SI1233378  0.00000  11.26977   8.785390  0.8033013
## 5522     21      7 SI1244329 39.43763  14.88862   8.991821 31.7729822
##      DD Inflacija Davek
## 5520  0       0.9    20
## 5521  0       0.9    20
## 5522  0       0.9    20

Opis spremenljivk:

podatki$PanogaF <- factor(podatki$Panoga,
                          levels = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21),
                          labels = c("A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P","Q", "R", "S", "T", "U"))

podatki$DržavaF <- factor(podatki$Država,
                          levels = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25),
                          labels = c("B", "BG", "HR", "CZ", "EST", "FI", "FR", "D", "GR", "HU", "ISL", "IR", "I", "LAT", "LT", "LX","N", "PL", "PT", "SK", "SVN", "ES", "SEV", "CH", "GB"))

podatki$DDF <- factor(podatki$DD, 
                      levels = c(0, 1), 
                      labels = c("D.O.O.", "D.D."))
summary(podatki[c(-1, -2, -3, -8)])
##       Dolg          Dobiček          LnSredstva        Oprijem     
##  Min.   : 0.00   Min.   :-15.000   Min.   : 7.967   Min.   : 0.01  
##  1st Qu.:10.94   1st Qu.:  1.922   1st Qu.: 9.091   1st Qu.:11.65  
##  Median :28.31   Median :  4.404   Median : 9.831   Median :30.15  
##  Mean   :29.98   Mean   :  5.403   Mean   :10.274   Mean   :36.16  
##  3rd Qu.:45.80   3rd Qu.:  8.086   3rd Qu.:11.071   3rd Qu.:55.99  
##  Max.   :86.18   Max.   : 35.000   Max.   :15.496   Max.   :96.93  
##                                                                    
##    Inflacija          Davek          PanogaF        DržavaF    
##  Min.   :-1.700   Min.   :10.00   C      :2340   GB     :1691  
##  1st Qu.: 0.000   1st Qu.:22.00   G      :2273   D      :1392  
##  Median : 0.600   Median :25.00   F      : 714   ES     : 930  
##  Mean   : 1.154   Mean   :25.31   M      : 621   B      : 736  
##  3rd Qu.: 2.200   3rd Qu.:32.00   L      : 535   PL     : 476  
##  Max.   :12.000   Max.   :33.99   H      : 534   CH     : 387  
##                                   (Other):1760   (Other):3165  
##      DDF      
##  D.O.O.:5664  
##  D.D.  :3113  
##               
##               
##               
##               
## 

Začetna regresija

fit <- glm(Dolg ~ Dobiček + LnSredstva + Oprijem + DDF + Inflacija + Davek,
           data = podatki)

summary(fit)
## 
## Call:
## glm(formula = Dolg ~ Dobiček + LnSredstva + Oprijem + DDF + 
##     Inflacija + Davek, data = podatki)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -48.337  -16.626   -1.421   14.073   68.619  
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  9.139528   1.837409   4.974 6.68e-07 ***
## Dobiček     -0.187980   0.033288  -5.647 1.68e-08 ***
## LnSredstva   0.612188   0.145329   4.212 2.55e-05 ***
## Oprijem      0.265129   0.007941  33.387  < 2e-16 ***
## DDFD.D.     -1.627060   0.482975  -3.369 0.000758 ***
## Inflacija   -0.279789   0.175111  -1.598 0.110128    
## Davek        0.271909   0.041171   6.604 4.23e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 425.227)
## 
##     Null deviance: 4314111  on 8776  degrees of freedom
## Residual deviance: 3729241  on 8770  degrees of freedom
## AIC: 78041
## 
## Number of Fisher Scoring iterations: 2
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:DescTools':
## 
##     Recode
vif(fit)
##    Dobiček LnSredstva    Oprijem        DDF  Inflacija      Davek 
##   1.034970   1.037910   1.043845   1.102006   1.549262   1.503731
mean(vif(fit))
## [1] 1.211954
podatki$StdOst <- rstandard(fit)
podatki$CooksD <- cooks.distance(fit)

hist(podatki$StdOst,
     main = "Histogram standardiziranih ostankov",
     ylab = "Frekvenca",
     xlab = "Standardizirani ostanki")

podatki$StdOcene <- scale(fit$fitted.values)

library(ggplot2)
ggplot(podatki, aes(y=StdOst, x=StdOcene)) +
  theme_linedraw() +
  geom_point() +
  xlab("Standardizirane ocenjene vrednosti") +
  ylab("Standardizirani ostanki")

head(podatki[order(podatki$StdOst), c("Podjetje", "StdOst") ], 4)
##          Podjetje    StdOst
## 1571  FR440220689 -2.345224
## 4076  ITVI0176757 -2.339666
## 536  BE0447029250 -2.321691
## 4011  ITSA0347093 -2.315026
head(podatki[order(-podatki$StdOst), c("Podjetje", "StdOst") ], 4)
##         Podjetje   StdOst
## 5127 PL630774290 3.329522
## 4222 LV000344720 3.256596
## 8123  GB02863246 3.235049
## 3669    IE174196 3.225632
head(podatki[order(-podatki$CooksD), c("Podjetje", "CooksD") ], 4)
##          Podjetje      CooksD
## 3552 IS4301710469 0.009364964
## 3551 IS4203697789 0.009342747
## 3555 IS4707043060 0.005844427
## 3741     IE387390 0.004948505
podatki <- podatki[c(-3552, -3551), ]
library(Rmisc)
## Loading required package: lattice
## Loading required package: plyr
## 
## Attaching package: 'plyr'
## The following object is masked from 'package:ggpubr':
## 
##     mutate
ocena_povp <- summarySE(podatki, 
                        measurevar="Dolg", #Spremenljivka, za katero računamo povprečje
                        groupvars=c("PanogaF"), #Enote razdelimo glede na faktor
                        conf.interval=0.95) #95-% interval zaupanja

library(ggplot2)
ggplot(ocena_povp, aes(x=PanogaF, y=Dolg)) +
  geom_bar(position = position_dodge(), 
           stat = "identity") +
  geom_errorbar(aes(ymin = Dolg-ci, ymax = Dolg+ci),
               width = 0.1,                    
               position = position_dodge(.9)) +
  theme_linedraw()

library(Rmisc)
ocena_povp <- summarySE(podatki, 
                        measurevar="Dolg", 
                        groupvars=c("DržavaF"), 
                        conf.interval=0.95)

library(ggplot2)
ggplot(ocena_povp, aes(x=DržavaF, y=Dolg)) +
  geom_bar(position=position_dodge(), 
           stat="identity") +
  geom_errorbar(aes(ymin=Dolg-ci, ymax=Dolg+ci),
                width=0.1,                    
                position=position_dodge(.9)) +
  theme_linedraw()

library(lme4)
library(lmerTest)
hierar_fit0 <- lmer(Dolg ~ (1 | Panoga) + (1 | Država), #Slučajna reg. konstanta
                     REML = FALSE,  #Metoda ML
                     data = podatki)

summary(hierar_fit0)
## Linear mixed model fit by maximum likelihood . t-tests use
##   Satterthwaite's method [lmerModLmerTest]
## Formula: Dolg ~ (1 | Panoga) + (1 | Država)
##    Data: podatki
## 
##      AIC      BIC   logLik deviance df.resid 
##  77435.5  77463.9 -38713.8  77427.5     8771 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.0583 -0.7629 -0.0648  0.6574  3.8909 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Država   (Intercept)  68.59    8.282  
##  Panoga   (Intercept)  63.11    7.944  
##  Residual             390.89   19.771  
## Number of obs: 8775, groups:  Država, 25; Panoga, 18
## 
## Fixed effects:
##             Estimate Std. Error     df t value Pr(>|t|)    
## (Intercept)   29.120      2.562 39.076   11.37 5.86e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ICC_panoga = 63.11/(63.11+68.59+390.89)
ICC_panoga
## [1] 0.1207639
ICC_država = 68.59/(63.11+68.59+390.89)
ICC_država
## [1] 0.1312501
hierar_fit1 <- lmer(Dolg ~ Dobiček + LnSredstva + Oprijem + DDF + (1 | Panoga) + (1 | Država), 
                     REML = FALSE,  
                     data = podatki)

anova(hierar_fit0, hierar_fit1, test="Chi")
## Data: podatki
## Models:
## hierar_fit0: Dolg ~ (1 | Panoga) + (1 | Država)
## hierar_fit1: Dolg ~ Dobiček + LnSredstva + Oprijem + DDF + (1 | Panoga) + (1 | Država)
##             npar   AIC   BIC logLik deviance  Chisq Df Pr(>Chisq)    
## hierar_fit0    4 77436 77464 -38714    77428                         
## hierar_fit1    8 76661 76718 -38323    76645 782.12  4  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(hierar_fit1)
## Linear mixed model fit by maximum likelihood . t-tests use
##   Satterthwaite's method [lmerModLmerTest]
## Formula: 
## Dolg ~ Dobiček + LnSredstva + Oprijem + DDF + (1 | Panoga) +  
##     (1 | Država)
##    Data: podatki
## 
##      AIC      BIC   logLik deviance df.resid 
##  76661.4  76718.1 -38322.7  76645.4     8767 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.5230 -0.7436 -0.0783  0.6425  4.2694 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Država   (Intercept)  74.38    8.625  
##  Panoga   (Intercept)  26.29    5.127  
##  Residual             357.91   18.918  
## Number of obs: 8775, groups:  Država, 25; Panoga, 18
## 
## Fixed effects:
##                Estimate  Std. Error          df t value Pr(>|t|)    
## (Intercept)   16.134062    2.683512   94.191981   6.012 3.45e-08 ***
## Dobiček       -0.230908    0.031160 8754.820441  -7.410 1.38e-13 ***
## LnSredstva     0.438839    0.153115 8762.421897   2.866  0.00417 ** 
## Oprijem        0.231763    0.008791 8384.640633  26.363  < 2e-16 ***
## DDFD.D.       -1.497560    0.538320 8716.595377  -2.782  0.00542 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
## Warning in abbreviate(rn, minlength = 6): abbreviate used with non-
## ASCII chars
##            (Intr) Dobičk LnSrds Oprijm
## Dobiček    -0.093                     
## LnSredstva -0.562  0.037              
## Oprijem    -0.143  0.107 -0.019       
## DDFD.D.     0.034 -0.001 -0.195  0.040
hierar_fit2 <- lmer(Dolg ~ Dobiček + LnSredstva + Oprijem + DDF + Inflacija + Davek + (1 | Panoga) + (1 | Država), 
                     REML = FALSE,  
                     data = podatki)
anova(hierar_fit1, hierar_fit2, test="Chi")
## Data: podatki
## Models:
## hierar_fit1: Dolg ~ Dobiček + LnSredstva + Oprijem + DDF + (1 | Panoga) + (1 | Država)
## hierar_fit2: Dolg ~ Dobiček + LnSredstva + Oprijem + DDF + Inflacija + Davek + (1 | Panoga) + (1 | Država)
##             npar   AIC   BIC logLik deviance  Chisq Df Pr(>Chisq)
## hierar_fit1    8 76661 76718 -38323    76645                     
## hierar_fit2   10 76665 76736 -38323    76645 0.3559  2      0.837