SWAN SHBG

Author

Amaru Agüero Jiménez

1 Library

Code
# Load necessary packages
install_and_load_packages <- function(packages) {
  for (package in packages) {
    # Comprueba si el paquete está instalado
    if (!require(package, character.only = TRUE)) {
      # Instala el paquete si no está instalado
      install.packages(package)
      # Carga el paquete después de instalarlo
      library(package, character.only = TRUE)
    }
  }
}

necessary_packages <- c("readstata13", "knitr", "tidyverse", "naniar", "writexl", "rcompanion", "performance","caret", "psych", "kableExtra", "stargazer","ggpubr","metan")

install_and_load_packages(necessary_packages)

opts_chunk$set(
  warning = FALSE,
  message = FALSE
)

#IMPORT DATA 
SWAN_USA_VISIT10_06_08 <- read.dta13(paste0(gsub("/docs", "", getwd()), "/ICPSR_32961/DS0001/32961-0001-Data.dta"))

SWANVISIT10<-SWAN_USA_VISIT10_06_08%>% select(SHBG10, #Sex hormone-binding globulin (nM)
                                              T10, #Testosterone (ng/dL)
                                              FSH10, #Follicle-stimulating hormone (mIU/mL)
                                              E2AVE10, #Estradiol (average, pg/mL)
                                              HPBMDT10, #Total Hip bone mineral density (BMD) with cross-calibration applied
                                              TSH10, #Thyroid-stimulating hormone (uIU/mL
                                              SKELMM10,
                                              FFMNHAN10,
                                              BMI10,
                                              AGE10,
                                              DHAS10,
                                              QLTYLIF10,
                                              PBFBIA10,
                                              #Dehydroepiandrosterone sulfate (ug/dL) 
                                              STATUS10, #Menopausal status: label: 1 Hysterectomy/both ovaries removed;2 Post-menopausal; 3 Late Peri;4 Early Peri;5 Pre-menopausal; 6 Pregnant/breastfeeding; 7 Unknown due to HT use; 8 Unknown due to hysterectomy 
                                              STEROI110,
                                              ARTHRT110,
                                              OSTEOAR10,
                                              OSTEOPR10,
                                              V_ACTI10,
                                              EXERCIS10) 

2 Variables.

  • SHBG10: Sex hormone-binding globulin (nM)

  • T10: Testosterone (ng/dL)

  • FSH10: Follicle-stimulating hormone (mIU/mL)

  • E2AVE10: Estradiol (average, pg/mL)

  • HPBMDT10: Total Hip bone mineral density (BMD) with cross-calibration applied

  • TSH10: Thyroid-stimulating hormone (uIU/mL

  • BMI10: Body mass index

  • SKELMM10: Skeletal Muscle Mass (Janssen equation)

  • FFMNHAN10: Fat free mass (NHANES III data/RJL Systems equation)

3 Continuous variable histogram.

Code
ggplot(gather(SWANVISIT10[,1:13]), aes(value)) + 
  geom_histogram(bins = 100) + 
  facet_wrap(~key, scales = 'free')+theme_minimal()

Code
SWANVISIT10_mod <- SWANVISIT10
colnames(SWANVISIT10_mod) <- gsub("10$", "", colnames(SWANVISIT10_mod))

# Primero transformamos los datos a formato largo
datos_largos <- gather(SWANVISIT10_mod[,1:13], key = "key", value = "value")

# Calculamos estadísticas por grupo
stats <- datos_largos %>%
  group_by(key) %>%
  summarise(mean = mean(value, na.rm = TRUE),
            sd = sd(value, na.rm = TRUE),
            median = median(value, na.rm = TRUE),
            min = min(value, na.rm = TRUE),
            max = max(value, na.rm = TRUE))

# Ahora hacemos el gráfico
ggplot(datos_largos, aes(value)) +
  geom_histogram(bins = 100, fill = "steelblue", color = "white") +
  facet_wrap(~key, scales = 'free') +
  theme_minimal() +
  geom_text(data = stats, aes(x = Inf, y = Inf, 
            label = paste0("Mean: ", round(mean,2), "\n",
                           "SD: ", round(sd,2), "\n",
                           "Median: ", round(median,2), "\n",
                           "Min: ", round(min,2), "\n",
                           "Max: ", round(max,2))),
            hjust = 1.1, vjust = 1.1, size = 3.2, color = "black")

4 “r” Spearman correlation for continuous variables.

  • “r” > 0: direct relation. “r” < 0: inverse relation. “r” = 0: no relation.

  • -0.25<“r”<0.25: bad predictor

  • -0.5<“r”<-0.25 or 0.25<“r”<0.5: poor predictor

  • -0.5<“r”<-0.75 or 0.5<“r”<0.75: good predictor

  • “r”<-0.75 or “r”>0.75: excellent predictor.

Code
pairs.panels(SWANVISIT10[,1:13],
             smooth = TRUE,      # If TRUE, draws loess smooths
             scale = FALSE,      # If TRUE, scales the correlation text font
             density = TRUE,     # If TRUE, adds density plots and histograms
             ellipses = TRUE,    # If TRUE, draws ellipses
             method = "spearman", # Correlation method (also "spearman" or "kendall")
             pch = 21,           # pch symbol
             lm = FALSE,         # If TRUE, plots linear fit rather than the LOESS (smoothed) fit
             cor = TRUE,         # If TRUE, reports correlations
             jiggle = FALSE,     # If TRUE, data points are jittered
             factor = 2,         # Jittering factor
             hist.col = "grey70",       # Histograms color
             stars = TRUE,       # If TRUE, adds significance level with stars
             ci = TRUE)          # If TRUE, adds confidence intervals

Code
# Crear un nuevo dataframe con los nombres de columnas modificados
SWANVISIT10_mod <- SWANVISIT10
colnames(SWANVISIT10_mod) <- gsub("10$", "", colnames(SWANVISIT10_mod))

all <- corr_coef(SWANVISIT10_mod[,1:13], method = "spearman")
plot(all)

Code
png("correlation_plot.png", width = 4000, height =3000, res = 600) 
plot(all)
dev.off()
png 
  2 

*: p value < 0.05

**: p value < 0.01

***: p value < 0.001

5 Generalized linear regression models (GLM) for dependent variable: SHBG.

5.1 Independent variable: Hormones + AGE + STATUS.

5.1.1 Gamma models:

  • SHBGM1: SHBG10 = T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + STATUS10
  • SHBGM1.2: SHBG10 = FSH10 + E2AVE10 + TSH10 + DHAS10 + STATUS10
  • SHBGM2: log(SHBG10) = T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + STATUS10
  • SHBGM2.2 log(SHBG10) = FSH10 + E2AVE10 + TSH10 + DHAS10 + STATUS10

5.1.2 Gaussian models:

  • SHBGM4: SHBG10 = T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + STATUS10
  • SHBGM4.2: SHBG10 = FSH10 + E2AVE10 + TSH10 + DHAS10 + STATUS10
  • SHBGM5: log(SHBG10) = T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + STATUS10
  • SHBGM5.2 log(SHBG10) = FSH10 + E2AVE10 + TSH10 + DHAS10 + STATUS10
  • SHBGM6: 1/SHBG10 = T10 + FSH10 + E2AVE10 + TSH10 + DHAS10 + STATUS10 + STATUS10
Code
#Dependent variable: SHBG#######################################################
#GAMMA IDENTITY ################################################################
SHBGM1 <- glm(SHBG10 ~  T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + STATUS10, data = SWANVISIT10, 
               Gamma(link = "identity"))
SHBGM1.2 <- glm(SHBG10 ~ FSH10 + E2AVE10 + TSH10  + DHAS10 + STATUS10, data = SWANVISIT10, 
               Gamma(link = "identity"))
#GAMMA LOG #####################################################################
SHBGM2 <- glm(SHBG10 ~ T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + STATUS10 , data = SWANVISIT10, 
               Gamma(link = "log"))
SHBGM2.2 <- glm(SHBG10 ~ FSH10 + E2AVE10 + TSH10 + DHAS10 + STATUS10 , data = SWANVISIT10, 
               Gamma(link = "log"))
#GAMMA INVERSE NO RUN ##########################################################
# SHBGM3 <- glm(SHBG10 ~ T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + STATUS10 , data = SWANVISIT10,
#               Gamma(link = "inverse"))
# SHBGM3.2 <- glm(SHBG10 ~ FSH10 + E2AVE10 + TSH10 + DHAS10 + STATUS10 , data = SWANVISIT10,Gamma(link = "inverse"))
#GAUSSIAN IDENTITY #############################################################
SHBGM4 <- glm(SHBG10 ~  T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + STATUS10, data = SWANVISIT10, 
               gaussian(link = "identity"))
SHBGM4.2 <- glm(SHBG10 ~ FSH10 + E2AVE10 + TSH10  + DHAS10 + STATUS10, data = SWANVISIT10, 
               gaussian(link = "identity"))
#GAUSSIAN LOG ##################################################################
SHBGM5 <- glm(SHBG10 ~ T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + STATUS10 , data = SWANVISIT10, 
               gaussian(link = "log"))
SHBGM5.2 <- glm(SHBG10 ~ T10 + FSH10 + E2AVE10 + TSH10 + DHAS10 + STATUS10 , data = SWANVISIT10, 
               gaussian(link = "log"))
#GAUSSIAN INVERSE  #############################################################
SHBGM6 <- glm(SHBG10 ~ T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + STATUS10 , data = SWANVISIT10,
              gaussian(link = "inverse"))
table1<-compare_performance(SHBGM1, SHBGM1.2, SHBGM2, SHBGM2.2, SHBGM4, SHBGM4.2, SHBGM5, SHBGM5.2, SHBGM6, rank = TRUE, verbose = FALSE)

5.1.3 Performance models.

Code
kable(table1, caption = "Performance models comparision") %>%
  kable_styling(full_width = T, font_size = 6) %>% collapse_rows()%>%
  kable_styling(latex_options = "HOLD_position")
Performance models comparision
Name Model RMSE Sigma R2 R2_Nagelkerke AIC_wt AICc_wt BIC_wt Performance_Score
SHBGM1 glm 25.55817 0.5639279 NA 0.1237001 0.9691331 0.9687881 0.2429628 0.8487503
SHBGM1.2 25.57468 0.5643538 0.1230304 0.0118307 0.0121790 0.7450047 0.5839205
SHBGM2 26.19880 0.5637168 0.1195765 0.0189204 0.0189137 0.0047434 0.2157610
SHBGM4 25.49849 25.5879749 0.1137367 NA 0.0000000 0.0000000 0.0000000 0.2032982
SHBGM2.2 26.22299 0.5639744 NA 0.1181792 0.0001158 0.0001192 0.0072892 0.2020033
SHBGM4.2 25.53403 25.6097516 0.1121234 NA 0.0000000 0.0000000 0.0000000 0.1933162
SHBGM5 25.64236 25.7324923 0.1037074 0.1624466
SHBGM5.2 25.65106 25.7342637 0.1030997 0.1600334
SHBGM6 25.91678 26.0075678 0.0844214 0.0845323
Code
plot(compare_performance(SHBGM1, SHBGM1.2, SHBGM2, SHBGM2.2, SHBGM4, SHBGM4.2, SHBGM5, SHBGM5.2, SHBGM6, rank = TRUE, verbose = FALSE))

5.1.4 Result for top 4 models: SHBGM1, SHBGM1.2, SHBGM2, SHBGM4

Code
stargazer(SHBGM1,SHBGM1.2, SHBGM2, SHBGM4,type = "html", title="Results")
Results
Dependent variable:
SHBG10
glm: Gamma glm: Gamma normal
link = identity link = log
(1) (2) (3) (4)
T10 -0.002 -0.0004 -0.025*
(0.006) (0.0003) (0.014)
FSH10 0.143*** 0.143*** 0.003*** 0.125***
(0.013) (0.013) (0.0003) (0.012)
E2AVE10 0.194*** 0.190*** 0.003*** 0.182***
(0.024) (0.024) (0.0004) (0.017)
TSH10 -0.366*** -0.370*** -0.013*** -0.393*
(0.093) (0.093) (0.005) (0.216)
AGE10 0.047 -0.001 -0.082
(0.228) (0.005) (0.240)
DHAS10 -0.041*** -0.041*** -0.001*** -0.049***
(0.007) (0.007) (0.0002) (0.008)
STATUS10Post-menopausal 2.976 2.450 0.044 1.081
(2.328) (2.336) (0.057) (2.595)
STATUS10Late perimenopause -4.032 -4.571 -0.106 -5.171
(3.037) (3.042) (0.077) (3.512)
STATUS10Early perimenopause 6.149* 5.586* 0.103 3.442
(3.263) (3.244) (0.077) (3.500)
STATUS10Pre-menopausal 11.762 11.192 0.179 4.577
(11.117) (11.118) (0.238) (10.796)
STATUS10Unknown due to hormones (HT) use 17.031*** 16.567*** 0.293** 11.490**
(6.173) (6.182) (0.114) (5.194)
STATUS10Unknown due to hysterectomy -2.569 -3.247 -0.110 -6.483
(3.829) (3.842) (0.098) (4.431)
Constant 24.611* 27.792*** 3.582*** 38.223***
(13.195) (2.783) (0.307) (13.957)
Observations 1,862 1,863 1,862 1,862
Log Likelihood -8,402.453 -8,408.859 -8,406.389 -8,673.373
Akaike Inf. Crit. 16,830.910 16,839.720 16,838.780 17,372.750
Note: p<0.1; p<0.05; p<0.01

5.1.5 Check best model: SHBGM1

Code
check_model(SHBGM1)

5.2 Independent variable: Hormones + SKELMM + AGE + STATUS.

5.2.1 Gamma models:

  • SHBGM7: SHBG10 = T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + SKELMM10 + DHAS10 + STATUS10
  • SHBGM7.2: SHBG10 = FSH10 + E2AVE10 + TSH10 + DHAS10 + SKELMM10 + STATUS10
  • SHBGM8: log(SHBG10) = T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + SKELMM10 + DHAS10 + STATUS10
  • SHBGM8.2 log(SHBG10) = FSH10 + E2AVE10 + TSH10 + SKELMM10 + DHAS10 + STATUS10

5.2.2 Gaussian models:

  • SHBGM9: SHBG10 = T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + SKELMM10 + DHAS10 + STATUS10
  • SHBGM9.2: SHBG10 = FSH10 + E2AVE10 + TSH10 + SKELMM10 + DHAS10 + STATUS10
  • SHBGM10: log(SHBG10) = T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + SKELMM10 + DHAS10 + STATUS10
  • SHBGM10.2 log(SHBG10) = FSH10 + E2AVE10 + TSH10 + SKELMM10 + DHAS10 + STATUS10
  • SHBGM11: 1/SHBG10 = T10 + FSH10 + E2AVE10 + TSH10 + SKELMM10 + DHAS10 + STATUS10 + STATUS10
  • SHBGM11.2: 1/SHBG10 = FSH10 + E2AVE10 + AGE10 + SKELMM10 + DHAS10 + STATUS10
Code
#Dependent variable: SHBG#######################################################
#GAMMA IDENTITY ################################################################
SHBGM7 <- glm(SHBG10 ~  T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + SKELMM10 + STATUS10, data = SWANVISIT10, 
               Gamma(link = "identity"))
SHBGM7.2 <- glm(SHBG10 ~ FSH10 + E2AVE10 + TSH10  + DHAS10 + SKELMM10 + STATUS10, data = SWANVISIT10, 
               Gamma(link = "identity"))
#GAMMA LOG #####################################################################
SHBGM8 <- glm(SHBG10 ~ T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + SKELMM10 + STATUS10 , data = SWANVISIT10, 
               Gamma(link = "log"))
SHBGM8.2 <- glm(SHBG10 ~ FSH10 + E2AVE10 + TSH10 + DHAS10 + SKELMM10 + STATUS10 , data = SWANVISIT10, 
               Gamma(link = "log"))
#GAMMA INVERSE NO RUN ##########################################################
# SHBGM3 <- glm(SHBG10 ~ T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + SKELMM10+ STATUS10 , data = SWANVISIT10,
#               Gamma(link = "inverse"))
# SHBGM3.2 <- glm(SHBG10 ~ FSH10 + E2AVE10 + TSH10 + DHAS10 + SKELMM10  + STATUS10 , data = SWANVISIT10,Gamma(link = "inverse"))
#GAUSSIAN IDENTITY #############################################################
SHBGM9 <- glm(SHBG10 ~  T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + SKELMM10 + STATUS10, data = SWANVISIT10, 
               gaussian(link = "identity"))
SHBGM9.2 <- glm(SHBG10 ~ FSH10 + E2AVE10 + TSH10  + DHAS10 + SKELMM10 + STATUS10, data = SWANVISIT10, 
               gaussian(link = "identity"))
#GAUSSIAN LOG ##################################################################
SHBGM10 <- glm(SHBG10 ~ T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + SKELMM10 + STATUS10 , data = SWANVISIT10, 
               gaussian(link = "log"))
SHBGM10.2 <- glm(SHBG10 ~ FSH10 + E2AVE10  + DHAS10 + SKELMM10 + STATUS10 , data = SWANVISIT10, 
               gaussian(link = "log"))
#GAUSSIAN INVERSE  #############################################################
SHBGM11 <- glm(SHBG10 ~ T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + SKELMM10 + STATUS10, data = SWANVISIT10,
              gaussian(link = "inverse"))
SHBGM11.2 <- glm(SHBG10 ~ FSH10 + E2AVE10 + AGE10 + DHAS10 + SKELMM10 + STATUS10 , data = SWANVISIT10,
              gaussian(link = "inverse"))

table1<-compare_performance(SHBGM7, SHBGM7.2, SHBGM8, SHBGM8.2, SHBGM9, SHBGM9.2, SHBGM10, SHBGM10.2, SHBGM11, SHBGM11.2, rank = TRUE, verbose = FALSE)

5.2.3 Performance models.

Code
kable(table1, caption = "Performance models comparision") %>%
  kable_styling(full_width = T, font_size = 6) %>% collapse_rows()%>%
  kable_styling(latex_options = "HOLD_position")
Performance models comparision
Name Model RMSE Sigma R2 R2_Nagelkerke AIC_wt AICc_wt BIC_wt Performance_Score
SHBGM8 glm 25.74648 0.5621445 NA 0.1559771 0.9390715 0.9387826 0.3134770 0.7906214
SHBGM8.2 25.80204 0.5625276 0.1547236 0.0072428 0.0074956 0.5544601 0.4665592
SHBGM7.2 25.35371 0.5626263 0.1529878 0.0014975 0.0015498 0.1146417 0.4193973
SHBGM7 25.33473 0.5614943 0.1527949 0.0521881 0.0521720 0.0174212 0.4107446
SHBGM9 25.26484 25.3694526 0.1454378 NA 0.0000000 0.0000000 0.0000000 0.2060437
SHBGM9.2 25.29870 25.3883545 0.1440060 0.1972878
SHBGM10 25.38139 25.4865398 0.1375352 0.1754976
SHBGM11 25.63599 25.7420578 0.1201456 0.1087726
SHBGM10.2 25.83288 25.9155890 0.1266110 0.0573600
SHBGM11.2 26.05153 26.1424654 0.1117640 0.0000000
Code
plot(compare_performance(SHBGM7, SHBGM7.2, SHBGM8, SHBGM8.2, SHBGM9, SHBGM9.2, SHBGM10, SHBGM10.2, SHBGM11, SHBGM11.2, rank = TRUE, verbose = FALSE))

5.2.4 Result for top 4 models: SHBGM8, SHBGM8.2, SHBGM7.2, SHBGM7

Code
stargazer(SHBGM8,SHBGM8.2, SHBGM7.2, SHBGM7,type = "html", title="Results")
Results
Dependent variable:
SHBG10
glm: Gamma glm: Gamma
link = log link = identity
(1) (2) (3) (4)
T10 -0.0003 0.003
(0.0003) (0.002)
FSH10 0.002*** 0.002*** 0.108*** 0.108***
(0.0003) (0.0003) (0.014) (0.014)
E2AVE10 0.003*** 0.003*** 0.183*** 0.185***
(0.0004) (0.0004) (0.025) (0.025)
TSH10 -0.013*** -0.013*** -0.391*** -0.394***
(0.005) (0.005) (0.091) (0.090)
AGE10 -0.007 -0.129
(0.006) (0.240)
DHAS10 -0.001*** -0.001*** -0.039*** -0.040***
(0.0002) (0.0002) (0.007) (0.007)
SKELMM10 -0.035*** -0.035*** -1.199*** -1.221***
(0.005) (0.004) (0.171) (0.172)
STATUS10Post-menopausal 0.015 -0.004 0.908 1.514
(0.060) (0.059) (2.431) (2.423)
STATUS10Late perimenopause -0.114 -0.116 -5.205* -4.962
(0.080) (0.080) (3.133) (3.125)
STATUS10Early perimenopause 0.068 0.072 4.027 4.177
(0.080) (0.079) (3.387) (3.410)
STATUS10Pre-menopausal 0.122 0.134 9.213 9.402
(0.260) (0.260) (13.500) (13.493)
STATUS10Unknown due to hormones (HT) use 0.245** 0.246** 13.738** 13.689**
(0.125) (0.125) (6.817) (6.784)
STATUS10Unknown due to hysterectomy -0.144 -0.153 -4.809 -4.146
(0.102) (0.102) (3.930) (3.910)
Constant 4.711*** 4.341*** 57.598*** 64.721***
(0.349) (0.125) (5.214) (14.956)
Observations 1,701 1,702 1,702 1,701
Log Likelihood -7,663.906 -7,670.771 -7,672.347 -7,666.796
Akaike Inf. Crit. 15,355.810 15,365.540 15,368.690 15,361.590
Note: p<0.1; p<0.05; p<0.01

5.2.5 Check best model: SHBGM8

Code
check_model(SHBGM8)

5.3 Independent variable: Hormones + FFMNHAN + AGE + STATUS.

5.3.1 Gamma models:

  • SHBGM12: SHBG10 = T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + FFMNHAN10 + DHAS10 + STATUS10
  • SHBGM12.2: SHBG10 = FSH10 + E2AVE10 + TSH10 + DHAS10 + FFMNHAN10 + STATUS10
  • SHBGM13: log(SHBG10) = T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + FFMNHAN10 + DHAS10 + STATUS10
  • SHBGM13.2 log(SHBG10) = FSH10 + E2AVE10 + TSH10 + FFMNHAN10 + DHAS10 + STATUS10

5.3.2 Gaussian models:

  • SHBGM14: SHBG10 = T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + FFMNHAN10 + DHAS10 + STATUS10
  • SHBGM14.2: SHBG10 = FSH10 + E2AVE10 + TSH10 + FFMNHAN10 + DHAS10 + STATUS10
  • SHBGM15: log(SHBG10) = T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + FFMNHAN10 + DHAS10 + STATUS10
  • SHBGM15.2 log(SHBG10) = FSH10 + E2AVE10 + TSH10 + FFMNHAN10 + DHAS10 + STATUS10
  • SHBGM16: 1/SHBG10 = T10 + FSH10 + E2AVE10 + TSH10 + FFMNHAN10 + DHAS10 + STATUS10 + STATUS10
  • SHBGM16.2: 1/SHBG10 = FSH10 + E2AVE10 + AGE10 + FFMNHAN10 + DHAS10 + STATUS10
Code
#Dependent variable: SHBG#######################################################
#GAMMA IDENTITY ################################################################
SHBGM12 <- glm(SHBG10 ~  T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + FFMNHAN10 + STATUS10, data = SWANVISIT10, 
               Gamma(link = "identity"))
SHBGM12.2 <- glm(SHBG10 ~ FSH10 + E2AVE10 + TSH10  + DHAS10 + FFMNHAN10 + STATUS10, data = SWANVISIT10, 
               Gamma(link = "identity"))
#GAMMA LOG #####################################################################
SHBGM13 <- glm(SHBG10 ~ T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + FFMNHAN10 + STATUS10 , data = SWANVISIT10, 
               Gamma(link = "log"))
SHBGM13.2 <- glm(SHBG10 ~ FSH10 + E2AVE10 + TSH10 + DHAS10 + FFMNHAN10 + STATUS10 , data = SWANVISIT10, 
               Gamma(link = "log"))
#GAMMA INVERSE NO RUN ##########################################################
# SHBGM3 <- glm(SHBG10 ~ T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + FFMNHAN10+ STATUS10 , data = SWANVISIT10,
#               Gamma(link = "inverse"))
# SHBGM3.2 <- glm(SHBG10 ~ FSH10 + E2AVE10 + TSH10 + DHAS10 + FFMNHAN10  + STATUS10 , data = SWANVISIT10,Gamma(link = "inverse"))
#GAUSSIAN IDENTITY #############################################################
SHBGM14 <- glm(SHBG10 ~  T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + FFMNHAN10 + STATUS10, data = SWANVISIT10, 
               gaussian(link = "identity"))
SHBGM14.2 <- glm(SHBG10 ~ FSH10 + E2AVE10 + TSH10  + DHAS10 + FFMNHAN10 + STATUS10, data = SWANVISIT10, 
               gaussian(link = "identity"))
#GAUSSIAN LOG ##################################################################
SHBGM15 <- glm(SHBG10 ~ T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + FFMNHAN10 + STATUS10 , data = SWANVISIT10, 
               gaussian(link = "log"))
SHBGM15.2 <- glm(SHBG10 ~ FSH10 + E2AVE10 + DHAS10 + FFMNHAN10 + STATUS10 , data = SWANVISIT10, 
               gaussian(link = "log"))
#GAUSSIAN INVERSE  #############################################################
SHBGM16 <- glm(SHBG10 ~ T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + FFMNHAN10 + STATUS10, data = SWANVISIT10,
              gaussian(link = "inverse"))
SHBGM16.2 <- glm(SHBG10 ~ FSH10 + E2AVE10 + AGE10 + DHAS10 + FFMNHAN10 + STATUS10 , data = SWANVISIT10,
              gaussian(link = "inverse"))

table1<-compare_performance(SHBGM12, SHBGM12.2, SHBGM13, SHBGM13.2, SHBGM14, SHBGM14.2, SHBGM15, SHBGM15.2, SHBGM16, SHBGM16.2, rank = TRUE, verbose = FALSE)

5.3.3 Performance models.

Code
kable(table1, caption = "Performance models comparision") %>%
  kable_styling(full_width = T, font_size = 6) %>% collapse_rows()%>%
  kable_styling(latex_options = "HOLD_position")
Performance models comparision
Name Model RMSE Sigma R2 R2_Nagelkerke AIC_wt AICc_wt BIC_wt Performance_Score
SHBGM13 glm 25.25284 0.5544585 NA 0.1817604 0.9878346 0.9874295 0.2699253 0.7749823
SHBGM13.2 25.29915 0.5546310 0.1809853 0.0116426 0.0120477 0.7295692 0.4919606
SHBGM12 25.02009 0.5554579 0.1736901 0.0005170 0.0005168 0.0001413 0.3706974
SHBGM12.2 25.04533 0.5559367 0.1728606 0.0000058 0.0000060 0.0003643 0.3630154
SHBGM14 24.92111 25.0243077 0.1685319 NA 0.0000000 0.0000000 0.0000000 0.2052254
SHBGM14.2 24.94851 25.0369257 0.1675395 0.1969486
SHBGM15 24.94932 25.0526548 0.1666486 0.1965806
SHBGM16 25.15164 25.2558629 0.1530783 0.1345761
SHBGM15.2 25.42343 25.5048546 0.1540781 0.0514706
SHBGM16.2 25.59118 25.6807870 0.1428780 0.0000000
Code
plot(compare_performance(SHBGM12, SHBGM12.2, SHBGM13, SHBGM13.2, SHBGM14, SHBGM14.2, SHBGM15, SHBGM15.2, SHBGM16, SHBGM16.2, rank = TRUE, verbose = FALSE))

5.3.4 Result for top 4 models: SHBGM13, SHBGM13.2, SHBGM12, SHBGM12.2

Code
stargazer(SHBGM13,SHBGM13.2, SHBGM12, SHBGM12.2,type = "html", title="Results")
Results
Dependent variable:
SHBG10
glm: Gamma glm: Gamma
link = log link = identity
(1) (2) (3) (4)
T10 -0.0002 -0.009
(0.0003) (0.010)
FSH10 0.002*** 0.002*** 0.090*** 0.091***
(0.0003) (0.0003) (0.014) (0.014)
E2AVE10 0.003*** 0.003*** 0.185*** 0.182***
(0.0004) (0.0004) (0.025) (0.025)
TSH10 -0.014*** -0.014*** -0.425*** -0.421***
(0.005) (0.005) (0.091) (0.090)
AGE10 -0.006 -0.148
(0.005) (0.235)
DHAS10 -0.001*** -0.001*** -0.038*** -0.038***
(0.0002) (0.0002) (0.007) (0.007)
FFMNHAN10 -0.020*** -0.020*** -0.688*** -0.688***
(0.002) (0.002) (0.076) (0.075)
STATUS10Post-menopausal 0.00001 -0.017 0.838 0.184
(0.059) (0.059) (2.379) (2.384)
STATUS10Late perimenopause -0.121 -0.121 -5.477* -5.726*
(0.079) (0.079) (3.062) (3.066)
STATUS10Early perimenopause 0.018 0.023 1.521 1.465
(0.079) (0.078) (3.361) (3.334)
STATUS10Pre-menopausal 0.068 0.079 6.315 6.447
(0.256) (0.256) (13.152) (13.141)
STATUS10Unknown due to hormones (HT) use 0.196 0.198 10.780 10.822
(0.123) (0.123) (6.567) (6.603)
STATUS10Unknown due to hysterectomy -0.144 -0.151 -4.078 -4.568
(0.100) (0.100) (3.858) (3.869)
Constant 4.986*** 4.626*** 75.233*** 67.124***
(0.344) (0.127) (14.645) (5.281)
Observations 1,701 1,702 1,701 1,702
Log Likelihood -7,640.045 -7,646.486 -7,647.600 -7,654.088
Akaike Inf. Crit. 15,308.090 15,316.970 15,323.200 15,332.180
Note: p<0.1; p<0.05; p<0.01

5.3.5 Check best model: SHBGM13

Code
check_model(SHBGM13)

6 Generalized linear regression models (GLM) for dependent variable: SKELMM.

6.1 Independent variable: Hormones + AGE + STATUS.

6.1.1 Gamma models:

  • SKELMMM1: SKELMM10 = SHBG10 + T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + STATUS10
  • SKELMMM1.2: SKELMM10 = SHBG10 + T10 + FSH10 + AGE10 + DHAS10 + STATUS10
  • SKELMMM2: log(SKELMM10) = SHBG10 + T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + STATUS10
  • SKELMMM2.2 log(SKELMM10) = SHBG10 + T10 + FSH10 + AGE10 + DHAS10 + STATUS10

6.1.2 Gaussian models:

  • SKELMMM3: SKELMM10 = SHBG10 + T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + STATUS10
  • SKELMMM3.2: SKELMM10 = SHBG10 + T10 + FSH10 + AGE10 + DHAS10 + STATUS10
  • SKELMMM4: log(SKELMM10) = SHBG10 + T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + STATUS10
  • SKELMMM4.2 log(SKELMM10) = SHBG10 + T10 + FSH10 + AGE10 + DHAS10 + STATUS10
Code
#GAMMA IDENTITY ################################################################
SKELMMM1 <- glm(SKELMM10 ~ SHBG10 + T10 + E2AVE10 + FSH10 + TSH10 + DHAS10 + AGE10 + STATUS10, data = SWANVISIT10, 
             Gamma(link = "identity"))
SKELMMM1.2 <- glm(SKELMM10 ~ SHBG10 + T10 + FSH10  + DHAS10 + AGE10 + STATUS10, data = SWANVISIT10, 
             Gamma(link = "identity"))
#GAMMA LOG #####################################################################
SKELMMM2 <- glm(SKELMM10 ~ SHBG10 + T10 + E2AVE10 + FSH10 + DHAS10 + AGE10 + TSH10 + STATUS10, data = SWANVISIT10, 
             Gamma(link = "log"))
SKELMMM2.2 <- glm(SKELMM10 ~ SHBG10 + T10  + FSH10 + DHAS10 + AGE10 + STATUS10, data = SWANVISIT10, 
             Gamma(link = "log"))
#GAUSSIAN IDENTITY #############################################################
SKELMMM3 <- glm(SKELMM10 ~ SHBG10 + T10 + E2AVE10 + FSH10 + DHAS10 + AGE10 + TSH10 + STATUS10, data = SWANVISIT10, 
             gaussian(link = "identity"))
SKELMMM3.2 <- glm(SKELMM10 ~ SHBG10 + E2AVE10 + FSH10 + AGE10 + DHAS10 + TSH10 + STATUS10, data = SWANVISIT10, 
             gaussian(link = "identity"))
#GAUSSIAN LOG ##################################################################
SKELMMM4 <- glm(SKELMM10 ~ SHBG10 + E2AVE10 + T10 + FSH10 + DHAS10 + AGE10 + TSH10 + STATUS10, data = SWANVISIT10, 
             gaussian(link = "log"))
SKELMMM4.2 <- glm(SKELMM10 ~ SHBG10 + E2AVE10 + FSH10 + AGE10 + DHAS10 + TSH10 + STATUS10, data = SWANVISIT10, 
             gaussian(link = "log"))

table1<-compare_performance(SKELMMM1, SKELMMM1.2, SKELMMM2, SKELMMM2.2, SKELMMM3, SKELMMM3.2, SKELMMM4, SKELMMM4.2, rank = TRUE, verbose = FALSE)

6.1.3 Performance models.

Code
kable(table1, caption = "Performance models comparision") %>%
  kable_styling(full_width = T, font_size = 6) %>% collapse_rows()%>%
  kable_styling(latex_options = "HOLD_position")
Performance models comparision
Name Model RMSE Sigma R2 R2_Nagelkerke AIC_wt AICc_wt BIC_wt Performance_Score
SKELMMM2 glm 2.967064 0.1455971 NA 0.1568555 0.9798527 0.9798527 0.9798527 0.9442015
SKELMMM2.2 2.960496 0.1452059 0.1601414 0.0000000 0.0000000 0.0000000 0.3905728
SKELMMM1.2 2.982190 0.1455598 0.1565211 0.2374752
SKELMMM1 2.987504 0.1459744 0.1529698 0.0201473 0.0201473 0.0201473 0.2122829
SKELMMM4 2.959160 2.9714143 0.1537564 NA 0.0000000 0.0000000 0.0000000 0.2011952
SKELMMM4.2 2.966478 2.9778746 0.1491518 0.1491043
SKELMMM3 2.966853 2.9791381 0.1493509 0.1463713
SKELMMM3.2 2.976970 2.9884045 0.1431227 0.0743324
Code
plot(compare_performance(SKELMMM1, SKELMMM1.2, SKELMMM2, SKELMMM2.2, SKELMMM3, SKELMMM3.2, SKELMMM4, SKELMMM4.2, rank = TRUE, verbose = FALSE))

6.1.4 Result for top 4 models: SKELMMM2, SKELMMM2.2, SKELMMM1.2, SKELMMM1

Code
stargazer(SKELMMM2, SKELMMM2.2, SKELMMM1.2, SKELMMM1, type = "html", title="Results")
Results
Dependent variable:
SKELMM10
glm: Gamma glm: Gamma
link = log link = identity
(1) (2) (3) (4)
SHBG10 -0.001*** -0.001*** -0.020*** -0.020***
(0.0001) (0.0001) (0.003) (0.003)
T10 0.0003*** 0.0004*** 0.014*** 0.014***
(0.0001) (0.0001) (0.003) (0.003)
E2AVE10 -0.00003 0.00005
(0.0001) (0.002)
FSH10 -0.001*** -0.001*** -0.012*** -0.012***
(0.0001) (0.0001) (0.001) (0.001)
DHAS10 -0.0001*** -0.0001*** -0.003*** -0.004***
(0.00005) (0.00005) (0.001) (0.001)
AGE10 -0.006*** -0.006*** -0.115*** -0.116***
(0.001) (0.001) (0.029) (0.029)
TSH10 -0.0004 -0.003
(0.001) (0.027)
STATUS10Post-menopausal -0.048*** -0.050*** -1.070*** -1.037***
(0.015) (0.015) (0.325) (0.328)
STATUS10Late perimenopause -0.012 -0.015 -0.331 -0.271
(0.021) (0.020) (0.435) (0.443)
STATUS10Early perimenopause -0.060*** -0.063*** -1.173*** -1.140***
(0.021) (0.020) (0.426) (0.437)
STATUS10Pre-menopausal -0.188*** -0.191*** -3.518*** -3.495***
(0.067) (0.067) (1.260) (1.265)
STATUS10Unknown due to hormones (HT) use -0.035 -0.037 -0.705 -0.646
(0.032) (0.031) (0.655) (0.676)
STATUS10Unknown due to hysterectomy -0.014 -0.016 -0.324 -0.284
(0.026) (0.026) (0.552) (0.561)
Constant 3.512*** 3.505*** 29.772*** 29.829***
(0.084) (0.082) (1.663) (1.701)
Observations 1,701 1,728 1,728 1,701
Log Likelihood -4,201.117 -4,263.796 -4,267.487 -4,205.001
Akaike Inf. Crit. 8,430.233 8,551.591 8,558.974 8,438.002
Note: p<0.1; p<0.05; p<0.01

6.1.5 Check best model: SKELMMM2

Code
check_model(SKELMMM2)

6.2 Independent variable: Hormones + FFMNHAN + AGE + STATUS.

6.2.1 Gamma models:

  • SKELMMM5: SKELMM10 = SHBG10 + FFMNHAN10 + T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + STATUS10
  • SKELMMM5.2: SKELMM10 = SHBG10 + FFMNHAN10 + T10 + FSH10 + AGE10 + DHAS10 + STATUS10
  • SKELMMM6: log(SKELMM10) = SHBG10 + FFMNHAN10 + T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + STATUS10
  • SKELMMM6.2 log(SKELMM10) = SHBG10 + FFMNHAN10 + T10 + FSH10 + AGE10 + DHAS10 + STATUS10

6.2.2 Gaussian models:

  • SKELMMM7: SKELMM10 = SHBG10 + FFMNHAN10 + T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + STATUS10
  • SKELMMM7.2: SKELMM10 = SHBG10 + FFMNHAN10 + T10 + FSH10 + AGE10 + DHAS10 + STATUS10
  • SKELMMM8: log(SKELMM10) = SHBG10 + FFMNHAN10 + T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + STATUS10
  • SKELMMM8.2 log(SKELMM10) = SHBG10 + FFMNHAN10 + T10 + FSH10 + AGE10 + DHAS10 + STATUS10
Code
#GAMMA IDENTITY ################################################################
SKELMMM5 <- glm(SKELMM10 ~ SHBG10 + FFMNHAN10 + T10 + E2AVE10 + FSH10 + TSH10 + DHAS10 + AGE10 + STATUS10, data = SWANVISIT10, 
             Gamma(link = "identity"))
SKELMMM5.2 <- glm(SKELMM10 ~ SHBG10 + FFMNHAN10  + E2AVE10 + TSH10 + DHAS10 + AGE10 + STATUS10, data = SWANVISIT10, 
             Gamma(link = "identity"))
#GAMMA LOG #####################################################################
SKELMMM6 <- glm(SKELMM10 ~ SHBG10 + FFMNHAN10 + T10 + E2AVE10 + FSH10 + DHAS10 + AGE10 + TSH10 + STATUS10, data = SWANVISIT10, 
             Gamma(link = "log"))
SKELMMM6.2 <- glm(SKELMM10 ~ SHBG10 + FFMNHAN10 + FSH10 + DHAS10 + AGE10 + TSH10 + STATUS10, data = SWANVISIT10, 
             Gamma(link = "log"))
#GAUSSIAN IDENTITY #############################################################
SKELMMM7 <- glm(SKELMM10 ~ SHBG10 + FFMNHAN10 + T10 + E2AVE10 + FSH10 + DHAS10 + AGE10 + TSH10 + STATUS10, data = SWANVISIT10, 
             gaussian(link = "identity"))
SKELMMM7.2 <- glm(SKELMM10 ~ SHBG10 + FFMNHAN10 + E2AVE10 + FSH10 + DHAS10 + AGE10 + TSH10 + STATUS10, data = SWANVISIT10, 
             gaussian(link = "identity"))
#GAUSSIAN LOG ##################################################################
SKELMMM8 <- glm(SKELMM10 ~ SHBG10 + FFMNHAN10 + T10 + E2AVE10 + FSH10 + DHAS10 + AGE10 + TSH10 + STATUS10, data = SWANVISIT10, 
             gaussian(link = "log"))
SKELMMM8.2 <- glm(SKELMM10 ~ SHBG10 + FFMNHAN10 + FSH10 + DHAS10 + AGE10 + TSH10 + STATUS10, data = SWANVISIT10, 
             gaussian(link = "log"))

table1<-compare_performance(SKELMMM5, SKELMMM5.2, SKELMMM6, SKELMMM6.2, SKELMMM7, SKELMMM7.2, SKELMMM8, SKELMMM8.2, rank = TRUE, verbose = FALSE)

6.2.3 Performance models.

Code
kable(table1, caption = "Performance models comparision") %>%
  kable_styling(full_width = T, font_size = 6) %>% collapse_rows()%>%
  kable_styling(latex_options = "HOLD_position")
Performance models comparision
Name Model RMSE Sigma R2 R2_Nagelkerke AIC_wt AICc_wt BIC_wt Performance_Score
SKELMMM5.2 glm 1.009532 0.0480083 NA 0.9054792 0.3976735 0.4065782 0.9934368 0.8616601
SKELMMM5 1.008331 0.0480016 0.9056463 0.6023265 0.5934218 0.0065632 0.7982040
SKELMMM7.2 1.007459 1.0116285 0.9018648 NA 0.0000000 0.0000000 0.0000000 0.2091031
SKELMMM7 1.007551 1.0120229 0.9018947 0.2086979
SKELMMM6 1.062431 0.0503618 NA 0.8958967 0.2029789
SKELMMM6.2 1.063395 0.0503789 0.8956991 0.1995291
SKELMMM8 1.052484 1.0571551 0.8929494 NA 0.0390977
SKELMMM8.2 1.053538 1.0575800 0.8927107 0.0352432
Code
plot(compare_performance(SKELMMM5, SKELMMM5.2, SKELMMM6, SKELMMM6.2, SKELMMM7, SKELMMM7.2, SKELMMM8, SKELMMM8.2, rank = TRUE, verbose = FALSE))

6.2.4 Result for top 4 models: SKELMMM5.2,SKELMMM5, SKELMMM7.2, SKELMMM7

Code
stargazer(SKELMMM5.2,SKELMMM5, SKELMMM7.2, SKELMMM7,type = "html", title="Results")
Results
Dependent variable:
SKELMM10
glm: Gamma normal
link = identity
(1) (2) (3) (4)
SHBG10 0.006*** 0.005*** 0.006*** 0.006***
(0.001) (0.001) (0.001) (0.001)
FFMNHAN10 0.426*** 0.427*** 0.428*** 0.428***
(0.004) (0.004) (0.004) (0.004)
T10 -0.0002 -0.0001
(0.001) (0.001)
E2AVE10 -0.002** -0.001* -0.001* -0.001*
(0.001) (0.001) (0.001) (0.001)
FSH10 0.001 0.001** 0.001**
(0.0005) (0.001) (0.001)
TSH10 0.019** 0.020** 0.019** 0.019**
(0.008) (0.008) (0.009) (0.009)
DHAS10 0.002*** 0.002*** 0.002*** 0.002***
(0.0003) (0.0003) (0.0003) (0.0003)
AGE10 -0.035*** -0.034*** -0.028*** -0.028***
(0.009) (0.009) (0.010) (0.010)
STATUS10Post-menopausal 0.062 0.073 0.051 0.061
(0.104) (0.105) (0.107) (0.108)
STATUS10Late perimenopause 0.147 0.157 0.146 0.154
(0.141) (0.141) (0.144) (0.145)
STATUS10Early perimenopause 0.604*** 0.641*** 0.658*** 0.665***
(0.140) (0.141) (0.145) (0.145)
STATUS10Pre-menopausal 0.060 0.123 0.044 0.051
(0.403) (0.405) (0.468) (0.468)
STATUS10Unknown due to hormones (HT) use 0.311 0.337 0.481** 0.490**
(0.215) (0.215) (0.225) (0.225)
STATUS10Unknown due to hysterectomy 0.147 0.154 0.104 0.112
(0.180) (0.180) (0.183) (0.183)
Constant 2.358*** 2.157*** 1.711*** 1.726***
(0.575) (0.594) (0.635) (0.635)
Observations 1,702 1,701 1,702 1,701
Log Likelihood -2,345.952 -2,343.537 -2,428.682 -2,427.410
Akaike Inf. Crit. 4,717.904 4,717.074 4,885.364 4,884.820
Note: p<0.1; p<0.05; p<0.01

6.2.5 Check best model: SKELMMM5.2

Code
check_model(SKELMMM5.2)

7 Generalized linear regression models (GLM) for dependent variable: FFMNHAN.

7.1 Independent variable: Hormones + AGE + STATUS.

7.1.1 Gamma models:

  • FFMNHANM1: FFMNHAN10 = SHBG10 + T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + STATUS10
  • FFMNHANM1.2: FFMNHAN10 = SHBG10 + T10 + FSH10 + AGE10 + DHAS10 + STATUS10
  • FFMNHANM2: log(FFMNHAN10) = SHBG10 + T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + STATUS10
  • FFMNHANM2.2 log(FFMNHAN10) = SHBG10 + T10 + FSH10 + AGE10 + DHAS10 + STATUS10

7.1.2 Gaussian models:

  • FFMNHANM3: FFMNHAN10 = SHBG10 + T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + STATUS10
  • FFMNHANM3.2: FFMNHAN10 = SHBG10 + T10 + FSH10 + AGE10 + DHAS10 + STATUS10
  • FFMNHANM4: log(FFMNHAN10) = SHBG10 + T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + STATUS10
  • FFMNHANM4.2 log(FFMNHAN10) = SHBG10 + T10 + FSH10 + AGE10 + DHAS10 + STATUS10
Code
#GAMMA IDENTITY ################################################################
FFMNHANM1 <- glm(FFMNHAN10 ~ SHBG10 + T10 + E2AVE10 + FSH10 + TSH10 + DHAS10 + AGE10 + STATUS10, data = SWANVISIT10, 
             Gamma(link = "identity"))
FFMNHANM1.2 <- glm(FFMNHAN10 ~ SHBG10 + T10 + FSH10  + DHAS10 + AGE10 + STATUS10, data = SWANVISIT10, 
             Gamma(link = "identity"))
#GAMMA LOG #####################################################################
FFMNHANM2 <- glm(FFMNHAN10 ~ SHBG10 + T10 + E2AVE10 + FSH10 + DHAS10 + AGE10 + TSH10 + STATUS10, data = SWANVISIT10, 
             Gamma(link = "log"))
FFMNHANM2.2 <- glm(FFMNHAN10 ~ SHBG10 + T10  + FSH10 + DHAS10 + AGE10 + STATUS10, data = SWANVISIT10, 
             Gamma(link = "log"))
#GAUSSIAN IDENTITY #############################################################
FFMNHANM3 <- glm(FFMNHAN10 ~ SHBG10 + T10 + E2AVE10 + FSH10 + DHAS10 + AGE10 + TSH10 + STATUS10, data = SWANVISIT10, 
             gaussian(link = "identity"))
FFMNHANM3.2 <- glm(FFMNHAN10 ~ SHBG10 + T10  + FSH10 + DHAS10 + AGE10 + STATUS10, data = SWANVISIT10, 
             gaussian(link = "identity"))
#GAUSSIAN LOG ##################################################################
FFMNHANM4 <- glm(FFMNHAN10 ~ SHBG10 + E2AVE10 + T10 + FSH10 + DHAS10 + AGE10 + TSH10 + STATUS10, data = SWANVISIT10, 
             gaussian(link = "log"))
FFMNHANM4.2 <- glm(FFMNHAN10 ~ SHBG10 + T10  + FSH10 + DHAS10 + AGE10 + STATUS10, data = SWANVISIT10, 
             gaussian(link = "log"))

table1<-compare_performance(FFMNHANM1, FFMNHANM1.2, FFMNHANM2, FFMNHANM2.2, FFMNHANM3, FFMNHANM3.2, FFMNHANM4, FFMNHANM4.2, rank = TRUE, verbose = FALSE)

7.1.3 Performance models.

Code
kable(table1, caption = "Performance models comparision") %>%
  kable_styling(full_width = T, font_size = 6) %>% collapse_rows()%>%
  kable_styling(latex_options = "HOLD_position")
Performance models comparision
Name Model RMSE Sigma R2 R2_Nagelkerke AIC_wt AICc_wt BIC_wt Performance_Score
FFMNHANM2 glm 6.524449 0.1424419 NA 0.2056103 0.9995289 0.9995289 0.9995289 0.9558712
FFMNHANM2.2 6.538665 0.1423373 0.2057558 0.0000000 0.0000000 0.0000000 0.3407568
FFMNHANM1 6.641838 0.1432200 0.1983762 0.0004711 0.0004711 0.0004711 0.2312961
FFMNHANM4 6.482955 6.5098056 0.2071929 NA 0.0000000 0.0000000 0.0000000 0.2011930
FFMNHANM1.2 6.671028 0.1429993 NA 0.1998176 0.1999793
FFMNHANM4.2 6.485863 6.5085103 0.2066599 NA 0.1981405
FFMNHANM3 6.521013 6.5480150 0.1978573 0.1595287
FFMNHANM3.2 6.522207 6.5449718 0.1977441 0.1583541
Code
plot(compare_performance(FFMNHANM1, FFMNHANM1.2, FFMNHANM2, FFMNHANM2.2, FFMNHANM3, FFMNHANM3.2, FFMNHANM4, FFMNHANM4.2, rank = TRUE, verbose = FALSE))

7.1.4 Result for top 4 models: FFMNHANM2, FFMNHANM2.2, FFMNHANM1.2, FFMNHANM1

Code
stargazer(FFMNHANM2, FFMNHANM2.2, FFMNHANM1, FFMNHANM4, type = "html", title="Results")
Results
Dependent variable:
FFMNHAN10
glm: Gamma glm: Gamma glm: gaussian
link = log link = identity link = log
(1) (2) (3) (4)
SHBG10 -0.001*** -0.001*** -0.060*** -0.001***
(0.0001) (0.0001) (0.006) (0.0001)
T10 0.0004*** 0.0004*** 0.041*** 0.0002***
(0.0001) (0.0001) (0.006) (0.0001)
E2AVE10 0.00004 0.004 0.00001
(0.0001) (0.005) (0.0001)
FSH10 -0.001*** -0.001*** -0.027*** -0.001***
(0.0001) (0.0001) (0.003) (0.0001)
DHAS10 -0.0002*** -0.0002*** -0.013*** -0.0002***
(0.00005) (0.00005) (0.002) (0.00005)
AGE10 -0.004*** -0.004*** -0.190*** -0.005***
(0.001) (0.001) (0.064) (0.001)
TSH10 -0.001 -0.051 -0.002
(0.001) (0.056) (0.001)
STATUS10Post-menopausal -0.053*** -0.056*** -2.652*** -0.052***
(0.015) (0.015) (0.726) (0.014)
STATUS10Late perimenopause -0.021 -0.023 -1.050 -0.021
(0.020) (0.020) (0.975) (0.019)
STATUS10Early perimenopause -0.095*** -0.097*** -4.104*** -0.104***
(0.020) (0.020) (0.951) (0.020)
STATUS10Pre-menopausal -0.196*** -0.196*** -7.845*** -0.212***
(0.066) (0.066) (2.786) (0.071)
STATUS10Unknown due to hormones (HT) use -0.061* -0.061** -2.640* -0.070**
(0.032) (0.031) (1.462) (0.031)
STATUS10Unknown due to hysterectomy -0.021 -0.019 -1.004 -0.020
(0.026) (0.025) (1.236) (0.024)
Constant 4.282*** 4.273*** 64.150*** 4.325***
(0.082) (0.080) (3.743) (0.082)
Observations 1,701 1,728 1,701 1,701
Log Likelihood -5,540.727 -5,629.799 -5,548.387 -5,594.084
Akaike Inf. Crit. 11,109.450 11,283.600 11,124.770 11,216.170
Note: p<0.1; p<0.05; p<0.01

7.1.5 Check best model: FFMNHANM2

Code
check_model(FFMNHANM2)

7.2 Independent variable: Hormones + SKELMM + AGE + STATUS.

7.2.1 Gamma models:

  • FFMNHANM5: FFMNHAN10 = SHBG10 + SKELMM10 + T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + STATUS10
  • FFMNHANM5.2: FFMNHAN10 = SHBG10 + SKELMM10 + T10 + FSH10 + AGE10 + DHAS10 + STATUS10
  • FFMNHANM6: log(FFMNHAN10) = SHBG10 + SKELMM10 + T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + STATUS10
  • FFMNHANM6.2 log(FFMNHAN10) = SHBG10 + SKELMM10 + T10 + FSH10 + AGE10 + DHAS10 + STATUS10

7.2.2 Gaussian models:

  • FFMNHANM7: FFMNHAN10 = SHBG10 + SKELMM10 + T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + STATUS10
  • FFMNHANM7.2: FFMNHAN10 = SHBG10 + SKELMM10 + T10 + FSH10 + AGE10 + DHAS10 + STATUS10
  • FFMNHANM8: log(FFMNHAN10) = SHBG10 + SKELMM10 + T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + STATUS10
  • FFMNHANM8.2 log(FFMNHAN10) = SHBG10 + SKELMM10 + T10 + FSH10 + AGE10 + DHAS10 + STATUS10

7.2.3 Performance models.

Code
kable(table1, caption = "Performance models comparision") %>%
  kable_styling(full_width = T, font_size = 6) %>% collapse_rows()%>%
  kable_styling(latex_options = "HOLD_position")
Performance models comparision
Name Model RMSE Sigma R2 R2_Nagelkerke AIC_wt AICc_wt BIC_wt Performance_Score
FFMNHANM5 glm 2.221270 0.0474767 NA 0.9100796 1 1 1 0.9947347
FFMNHANM5.2 2.241853 0.0478859 0.9085427 0 0 0 0.3785673
FFMNHANM7 2.214552 2.2243819 0.9074890 NA 0.2168609
FFMNHANM6 2.448510 0.0495351 NA 0.9011267 0.2164476
FFMNHANM7.2 2.234349 2.2428014 0.9058488 NA 0.1997944
FFMNHANM6.2 2.469715 0.0499975 NA 0.8993587 0.1997879
FFMNHANM8 2.395417 2.4060529 0.8917610 NA 0.0598127
FFMNHANM8.2 2.415663 2.4248008 0.8899484 0.0423668
Code
plot(compare_performance(FFMNHANM5, FFMNHANM5.2, FFMNHANM6, FFMNHANM6.2, FFMNHANM7, FFMNHANM7.2, FFMNHANM8, FFMNHANM8.2, rank = TRUE, verbose = FALSE))

7.2.4 Result for top 4 models: FFMNHANM6, FFMNHANM6.2, FFMNHANM5.2, FFMNHANM5

Code
stargazer(FFMNHANM5, FFMNHANM5.2, FFMNHANM7, FFMNHANM6, type = "html", title="Results")
Results
Dependent variable:
FFMNHAN10
glm: Gamma normal glm: Gamma
link = identity link = log
(1) (2) (3) (4)
SHBG10 -0.018*** -0.015*** -0.019*** -0.0005***
(0.002) (0.002) (0.002) (0.00005)
SKELMM10 2.080*** 2.084*** 2.067*** 0.044***
(0.019) (0.019) (0.018) (0.0004)
T10 0.004*** 0.005*** 0.002 0.0001**
(0.001) (0.001) (0.001) (0.00003)
E2AVE10 0.003** 0.003* 0.0001**
(0.001) (0.002) (0.00003)
FSH10 -0.004*** -0.005*** -0.007*** -0.0001***
(0.001) (0.001) (0.001) (0.00003)
TSH10 -0.046*** -0.047** -0.001**
(0.018) (0.019) (0.0004)
DHAS10 -0.005*** -0.004*** -0.005*** -0.0001***
(0.001) (0.001) (0.001) (0.00002)
AGE10 0.051** 0.050** 0.035 0.001
(0.021) (0.021) (0.022) (0.0005)
STATUS10Post-menopausal -0.477** -0.524** -0.413* -0.008
(0.234) (0.235) (0.237) (0.005)
STATUS10Late perimenopause -0.466 -0.438 -0.427 -0.008
(0.315) (0.313) (0.318) (0.007)
STATUS10Early perimenopause -1.809*** -1.723*** -1.903*** -0.037***
(0.309) (0.305) (0.318) (0.007)
STATUS10Pre-menopausal -1.255 -1.178 -1.162 -0.029
(0.895) (0.902) (1.028) (0.023)
STATUS10Unknown due to hormones (HT) use -1.033** -0.939** -1.363*** -0.024**
(0.471) (0.463) (0.494) (0.011)
STATUS10Unknown due to hysterectomy -0.417 -0.267 -0.337 -0.006
(0.400) (0.400) (0.403) (0.009)
Constant 2.647** 2.493* 4.219*** 2.932***
(1.316) (1.305) (1.396) (0.031)
Observations 1,701 1,728 1,701 1,701
Log Likelihood -3,692.686 -3,767.220 -3,766.995 -3,773.347
Akaike Inf. Crit. 7,415.372 7,560.441 7,563.990 7,576.695
Note: p<0.1; p<0.05; p<0.01

7.2.5 Check best model: FFMNHANM5

Code
check_model(FFMNHANM5)

8 K-folds cross validation predictions for dependent variable: SHBG.

8.1 SHBGM1 = Hormones + AGE + STATUS.

Code
NSIM<-2999

SHBGM1 <- glm(SHBG10 ~  T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + STATUS10, data = SWANVISIT10,
               Gamma(link = "identity"))
TEST<- drop_na(SWANVISIT10, c("SHBG10", "T10", "FSH10", "E2AVE10", "TSH10", "AGE10", "DHAS10", "STATUS10"))

folds<-c()
fold_cv<-c() 
table2<-data.frame(matrix(nrow = NSIM, ncol = 2)) 
colnames(table2) = c("MEAN","SD")
for (i in 1:NSIM) {
  folds[[i]] <- createFolds(TEST$SHBG10, k=5) 
  fold_cv[[i]] <- lapply(folds[[i]], function(x){
    TEST_train <- TEST[-x, ]
    TEST_test <- TEST[x, ]
    TEST_model <- glm(SHBG10 ~  T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + STATUS10, data =  TEST_test, 
                      Gamma(link = "log"))
    pred_results <- TEST_model$fitted.values
    pred_cor <- cor(TEST_test$SHBG10, pred_results)
    return(pred_cor)
  })
  table2[i,1]<- mean(unlist(fold_cv[[i]]))
  table2[i,2]<- sd(unlist(fold_cv[[i]]))
}

table2$CIlwr<- table2$MEAN + quantile(distributions3::StudentsT(df = 4), 0.05 / 2) * table2$SD / sqrt(4)
table2$CIupr<- table2$MEAN + quantile(distributions3::StudentsT(df = 4), 1 - 0.05/ 2) * table2$SD / sqrt(4)

table2 %>% 
  ggplot(aes(x = 1:NSIM, y = MEAN)) + 
  geom_ribbon(aes(ymin = CIlwr, ymax = CIupr), alpha = 0.3, fill = "red")+
  geom_line(colour = "blue",alpha = 0.5) + 
  theme_minimal()+
  xlab("Simulation")+
  ylab("Mean of k-folds CV, Correlation(Fitted Values-Actual Values). CI 95%")

8.2 SHBGM8 = Hormones + SKELMM + AGE + STATUS.

Code
NSIM<-2999

SHBGM8 <- glm(SHBG10 ~ T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + SKELMM10 + STATUS10 , data = SWANVISIT10, 
               Gamma(link = "log"))

TEST<- drop_na(SWANVISIT10, c("SHBG10", "T10", "FSH10", "E2AVE10", "SKELMM10", "TSH10", "AGE10", "DHAS10", "STATUS10"))

folds<-c()
fold_cv<-c() 
table2<-data.frame(matrix(nrow = NSIM, ncol = 2)) 
colnames(table2) = c("MEAN","SD")
for (i in 1:NSIM) {
  folds[[i]] <- createFolds(TEST$SHBG10, k=5) 
  fold_cv[[i]] <- lapply(folds[[i]], function(x){
    TEST_train <- TEST[-x, ]
    TEST_test <- TEST[x, ]
    TEST_model <- glm(SHBG10 ~  T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + SKELMM10 + STATUS10, data =  TEST_test, 
                      Gamma(link = "log"))
    pred_results <- TEST_model$fitted.values
    pred_cor <- cor(TEST_test$SHBG10, pred_results)
    return(pred_cor)
  })
  table2[i,1]<- mean(unlist(fold_cv[[i]]))
  table2[i,2]<- sd(unlist(fold_cv[[i]]))
}

table2$CIlwr<- table2$MEAN + quantile(distributions3::StudentsT(df = 4), 0.05 / 2) * table2$SD / sqrt(4)
table2$CIupr<- table2$MEAN + quantile(distributions3::StudentsT(df = 4), 1 - 0.05/ 2) * table2$SD / sqrt(4)

table2 %>% 
  ggplot(aes(x = 1:NSIM, y = MEAN)) + 
  geom_ribbon(aes(ymin = CIlwr, ymax = CIupr), alpha = 0.3, fill = "red")+
  geom_line(colour = "blue",alpha = 0.5) + 
  theme_minimal()+
  xlab("Simulation")+
  ylab("Mean of k-folds CV, Correlation(Fitted Values-Actual Values). CI 95%")

8.3 SHBGM13 = Hormones + FFMNHAN + AGE + STATUS.

Code
SHBGM8 <- glm(SHBG10 ~ T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + FFMNHAN10 + STATUS10 , data = SWANVISIT10, 
               Gamma(link = "log"))

TEST<- drop_na(SWANVISIT10, c("SHBG10", "T10", "FSH10", "E2AVE10", "FFMNHAN10", "TSH10", "AGE10", "DHAS10", "STATUS10"))

folds<-c()
fold_cv<-c() 
table2<-data.frame(matrix(nrow = NSIM, ncol = 2)) 
colnames(table2) = c("MEAN","SD")
for (i in 1:NSIM) {
  folds[[i]] <- createFolds(TEST$SHBG10, k=5) 
  fold_cv[[i]] <- lapply(folds[[i]], function(x){
    TEST_train <- TEST[-x, ]
    TEST_test <- TEST[x, ]
    TEST_model <- glm(SHBG10 ~  T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + FFMNHAN10 + STATUS10, data =  TEST_test, 
                      Gamma(link = "log"))
    pred_results <- TEST_model$fitted.values
    pred_cor <- cor(TEST_test$SHBG10, pred_results)
    return(pred_cor)
  })
  table2[i,1]<- mean(unlist(fold_cv[[i]]))
  table2[i,2]<- sd(unlist(fold_cv[[i]]))
}

table2$CIlwr<- table2$MEAN + quantile(distributions3::StudentsT(df = 4), 0.05 / 2) * table2$SD / sqrt(4)
table2$CIupr<- table2$MEAN + quantile(distributions3::StudentsT(df = 4), 1 - 0.05/ 2) * table2$SD / sqrt(4)

table2 %>% 
  ggplot(aes(x = 1:NSIM, y = MEAN)) + 
  geom_ribbon(aes(ymin = CIlwr, ymax = CIupr), alpha = 0.3, fill = "red")+
  geom_line(colour = "blue",alpha = 0.5) + 
  theme_minimal()+
  xlab("Simulation")+
  ylab("Mean of k-folds CV, Correlation(Fitted Values-Actual Values). CI 95%")

9 K-folds cross validation predictions for dependent variable: SKELMM.

9.1 SKELMM = Hormones + AGE + STATUS.

Code
SKELMMM2 <- glm(SKELMM10 ~ SHBG10 + T10 + E2AVE10 + FSH10 + DHAS10 + AGE10 + TSH10 + STATUS10, data = SWANVISIT10, 
             Gamma(link = "log"))
TEST<- drop_na(SWANVISIT10, c("SHBG10", "SKELMM10", "T10", "FSH10", "E2AVE10", "TSH10", "AGE10", "DHAS10", "STATUS10"))

folds<-c()
fold_cv<-c() 
table2<-data.frame(matrix(nrow = NSIM, ncol = 2)) 
colnames(table2) = c("MEAN","SD")
for (i in 1:NSIM) {
  folds[[i]] <- createFolds(TEST$SKELMM10, k=5) 
  fold_cv[[i]] <- lapply(folds[[i]], function(x){
    TEST_train <- TEST[-x, ]
    TEST_test <- TEST[x, ]
    TEST_model <- glm(SKELMM10 ~ SHBG10 + T10 + E2AVE10 + FSH10 + DHAS10 + AGE10 + TSH10 + STATUS10, data =  TEST_test, 
                      Gamma(link = "log"))
    pred_results <- TEST_model$fitted.values
    pred_cor <- cor(TEST_test$SKELMM10, pred_results)
    return(pred_cor)
  })
  table2[i,1]<- mean(unlist(fold_cv[[i]]))
  table2[i,2]<- sd(unlist(fold_cv[[i]]))
}

table2$CIlwr<- table2$MEAN + quantile(distributions3::StudentsT(df = 4), 0.05 / 2) * table2$SD / sqrt(4)
table2$CIupr<- table2$MEAN + quantile(distributions3::StudentsT(df = 4), 1 - 0.05/ 2) * table2$SD / sqrt(4)

table2 %>% 
  ggplot(aes(x = 1:NSIM, y = MEAN)) + 
  geom_ribbon(aes(ymin = CIlwr, ymax = CIupr), alpha = 0.3, fill = "red")+
  geom_line(colour = "blue",alpha = 0.5) + 
  theme_minimal()+
  xlab("Simulation")+
  ylab("Mean of k-folds CV, Correlation(Fitted Values-Actual Values). CI 95%")

9.2 SKELMM = Hormones + FFMNHAN + AGE + STATUS.

Code
SKELMMM5.2 <- glm(SKELMM10 ~ SHBG10 + FFMNHAN10 + E2AVE10 + TSH10 + DHAS10 + AGE10 + STATUS10, data = SWANVISIT10, 
             Gamma(link = "identity"))
TEST<- drop_na(SWANVISIT10, c("SHBG10", "SKELMM10","FFMNHAN10", "E2AVE10", "TSH10", "AGE10", "DHAS10", "STATUS10"))

folds<-c()
fold_cv<-c() 
table2<-data.frame(matrix(nrow = NSIM, ncol = 2)) 
colnames(table2) = c("MEAN","SD")
for (i in 1:NSIM) {
  folds[[i]] <- createFolds(TEST$SKELMM10, k=5) 
  fold_cv[[i]] <- lapply(folds[[i]], function(x){
    TEST_train <- TEST[-x, ]
    TEST_test <- TEST[x, ]
    TEST_model <- glm(SKELMM10 ~ SHBG10 + FFMNHAN10 + E2AVE10 + TSH10 + DHAS10 + AGE10 + STATUS10, data =  TEST_test, 
                      Gamma(link = "identity"))
    pred_results <- TEST_model$fitted.values
    pred_cor <- cor(TEST_test$SKELMM10, pred_results)
    return(pred_cor)
  })
  table2[i,1]<- mean(unlist(fold_cv[[i]]))
  table2[i,2]<- sd(unlist(fold_cv[[i]]))
}

table2$CIlwr<- table2$MEAN + quantile(distributions3::StudentsT(df = 4), 0.05 / 2) * table2$SD / sqrt(4)
table2$CIupr<- table2$MEAN + quantile(distributions3::StudentsT(df = 4), 1 - 0.05/ 2) * table2$SD / sqrt(4)

table2 %>% 
  ggplot(aes(x = 1:NSIM, y = MEAN)) + 
  geom_ribbon(aes(ymin = CIlwr, ymax = CIupr), alpha = 0.3, fill = "red")+
  geom_line(colour = "blue",alpha = 0.5) + 
  theme_minimal()+
  xlab("Simulation")+
  ylab("Mean of k-folds CV, Correlation(Fitted Values-Actual Values). CI 95%")

10 K-folds cross validation predictions for dependent variable: FFMNHAN.

10.1 FFMNHAN = Hormones + AGE + STATUS.

Code
FFMNHANM2 <- glm(FFMNHAN10 ~ SHBG10 + T10 + E2AVE10 + FSH10 + DHAS10 + AGE10 + TSH10 + STATUS10, data = SWANVISIT10, 
             Gamma(link = "log"))
TEST<- drop_na(SWANVISIT10, c("SHBG10", "FFMNHAN10", "T10", "FSH10", "E2AVE10", "TSH10", "AGE10", "DHAS10", "STATUS10"))

folds<-c()
fold_cv<-c() 
table2<-data.frame(matrix(nrow = NSIM, ncol = 2)) 
colnames(table2) = c("MEAN","SD")
for (i in 1:NSIM) {
  folds[[i]] <- createFolds(TEST$FFMNHAN10, k=5) 
  fold_cv[[i]] <- lapply(folds[[i]], function(x){
    TEST_train <- TEST[-x, ]
    TEST_test <- TEST[x, ]
    TEST_model <- glm(FFMNHAN10 ~ SHBG10 + T10 + E2AVE10 + FSH10 + DHAS10 + AGE10 + TSH10 + STATUS10, data =  TEST_test, 
                      Gamma(link = "log"))
    pred_results <- TEST_model$fitted.values
    pred_cor <- cor(TEST_test$FFMNHAN10, pred_results)
    return(pred_cor)
  })
  table2[i,1]<- mean(unlist(fold_cv[[i]]))
  table2[i,2]<- sd(unlist(fold_cv[[i]]))
}

table2$CIlwr<- table2$MEAN + quantile(distributions3::StudentsT(df = 4), 0.05 / 2) * table2$SD / sqrt(4)
table2$CIupr<- table2$MEAN + quantile(distributions3::StudentsT(df = 4), 1 - 0.05/ 2) * table2$SD / sqrt(4)

table2 %>% 
  ggplot(aes(x = 1:NSIM, y = MEAN)) + 
  geom_ribbon(aes(ymin = CIlwr, ymax = CIupr), alpha = 0.3, fill = "red")+
  geom_line(colour = "blue",alpha = 0.5) + 
  theme_minimal()+
  xlab("Simulation")+
  ylab("Mean of k-folds CV, Correlation(Fitted Values-Actual Values). CI 95%")

10.2 FFMNHAN = Hormones + SKELMM + AGE + STATUS.

Code
FFMNHANM5 <- glm(FFMNHAN10 ~ SHBG10 + SKELMM10  + T10 + E2AVE10 + FSH10 + TSH10 + DHAS10 + AGE10 + STATUS10, data = SWANVISIT10, 
             Gamma(link = "identity"))
TEST<- drop_na(SWANVISIT10, c("SHBG10", "FFMNHAN10","SKELMM10", "T10", "FSH10", "E2AVE10", "TSH10", "AGE10", "DHAS10", "STATUS10"))

folds<-c()
fold_cv<-c() 
table2<-data.frame(matrix(nrow = NSIM, ncol = 2)) 
colnames(table2) = c("MEAN","SD")
for (i in 1:NSIM) {
  folds[[i]] <- createFolds(TEST$FFMNHAN10, k=5) 
  fold_cv[[i]] <- lapply(folds[[i]], function(x){
    TEST_train <- TEST[-x, ]
    TEST_test <- TEST[x, ]
    TEST_model <- glm(FFMNHAN10 ~ SHBG10 + SKELMM10  + T10 + E2AVE10 + FSH10 + TSH10 + DHAS10 + AGE10 + STATUS10, data =  TEST_test, 
                      Gamma(link = "log"))
    pred_results <- TEST_model$fitted.values
    pred_cor <- cor(TEST_test$FFMNHAN10, pred_results)
    return(pred_cor)
  })
  table2[i,1]<- mean(unlist(fold_cv[[i]]))
  table2[i,2]<- sd(unlist(fold_cv[[i]]))
}

table2$CIlwr<- table2$MEAN + quantile(distributions3::StudentsT(df = 4), 0.05 / 2) * table2$SD / sqrt(4)
table2$CIupr<- table2$MEAN + quantile(distributions3::StudentsT(df = 4), 1 - 0.05/ 2) * table2$SD / sqrt(4)

table2 %>% 
  ggplot(aes(x = 1:NSIM, y = MEAN)) + 
  geom_ribbon(aes(ymin = CIlwr, ymax = CIupr), alpha = 0.3, fill = "red")+
  geom_line(colour = "blue",alpha = 0.5) + 
  theme_minimal()+
  xlab("Simulation")+
  ylab("Mean of k-folds CV, Correlation(Fitted Values-Actual Values). CI 95%")

Code
comparaciones <- list(
  c("Hysterectomy/both ovaries removed", "Late perimenopause"),
  c("Hysterectomy/both ovaries removed", "Pre-menopausal"),
  c("Hysterectomy/both ovaries removed", "Post-menopausal"),
  c("Hysterectomy/both ovaries removed", "Early perimenopause"),
  c("Hysterectomy/both ovaries removed", "Unknown due to hormones (HT) use"),
  c("Hysterectomy/both ovaries removed", "Unknown due to hysterectomy"),
  c("Late perimenopause", "Pre-menopausal"),
  c("Late perimenopause", "Post-menopausal"),
  c("Late perimenopause", "Early perimenopause"),
  c("Late perimenopause", "Unknown due to hormones (HT) use"),
  c("Late perimenopause", "Unknown due to hysterectomy"),
  c("Pre-menopausal", "Post-menopausal"),
  c("Pre-menopausal", "Early perimenopause"),
  c("Pre-menopausal", "Unknown due to hormones (HT) use"),
  c("Pre-menopausal", "Unknown due to hysterectomy"),
  c("Post-menopausal", "Early perimenopause"),
  c("Post-menopausal", "Unknown due to hormones (HT) use"),
  c("Post-menopausal", "Unknown due to hysterectomy"),
  c("Early perimenopause", "Unknown due to hormones (HT) use"),
  c("Early perimenopause", "Unknown due to hysterectomy"),
  c("Unknown due to hormones (HT) use", "Unknown due to hysterectomy")
)

ggbarplot(SWANVISIT10 %>% drop_na(STATUS10), x = "STATUS10", y = "SHBG10",
          add = "mean_se", # Agregar media y error estándar
          color = "STATUS10", fill = "STATUS10", # Colorear según STATUS10
          palette = "jco", alpha=0.7) + # Elegir una paleta de colores
  stat_compare_means(method = "anova") + # Realizar ANOVA
  stat_compare_means(comparisons = comparaciones,
                     method = "t.test", 
                     p.adjust.method = "bonferroni") + # Ajustar con Bonferroni
  theme(axis.text.x = element_text(angle = 60, hjust = 1))# Ajustar con Bonferroni

Code
ggbarplot(SWANVISIT10 %>% drop_na(STATUS10), x = "STATUS10", y = "SKELMM10",
          add = "mean_se", # Agregar media y error estándar
          color = "STATUS10", fill = "STATUS10", # Colorear según STATUS10
          palette = "jco", alpha=0.7) + # Elegir una paleta de colores
  stat_compare_means(method = "anova") + # Realizar ANOVA
  stat_compare_means(comparisons = comparaciones,
                     method = "t.test", 
                     p.adjust.method = "bonferroni")  + # Ajustar con Bonferroni
  theme(axis.text.x = element_text(angle = 60, hjust = 1)) # Ajustar con Bonferroni

Code
# Primero, realiza las comparaciones y guarda los resultados
# Realiza las comparaciones y guarda los resultados
comparaciones_resultados <- compare_means(
  SHBG10 ~ STATUS10, data = SWANVISIT10 %>% drop_na(STATUS10),
  comparisons = comparaciones,
  method = "t.test",
  p.adjust.method = "bonferroni"
)

# Filtra solo comparaciones significativas (p.adj < 0.05)
comparaciones_significativas <- comparaciones_resultados %>% 
  filter(p.format < 0.05) %>% 
  select(group1, group2) %>% 
  pmap(c) # Esto genera una lista de vectores de caracteres

# Ahora genera la gráfica solo con comparaciones significativas
ggbarplot(SWANVISIT10_mod %>% drop_na(STATUS), x = "STATUS", y = "SHBG",
          add = "mean_se",
          color = "STATUS", fill = "STATUS",
          palette = "jco", alpha = 0.7) +
stat_compare_means(comparisons = comparaciones_significativas,
                     method = "t.test",
                     p.adjust.method = "bonferroni") +
  stat_compare_means(method = "anova", 
                     label.y = max(SWANVISIT10_mod$SHBG, na.rm = TRUE) * 1.2, # ligeramente arriba del valor máximo
                     label.x = 0.7, # posición izquierda
                     hjust = 0) + # alineación izquierda
  theme(axis.text.x = element_text(angle = 60, hjust = 1))

Code
# Primero, realiza las comparaciones y guarda los resultados
# Realiza las comparaciones y guarda los resultados
comparaciones_resultados <- compare_means(
  SKELMM10 ~ STATUS10, data = SWANVISIT10 %>% drop_na(STATUS10),
  comparisons = comparaciones,
  method = "t.test",
  p.adjust.method = "bonferroni"
)

# Filtra solo comparaciones significativas (p.adj < 0.05)
comparaciones_significativas <- comparaciones_resultados %>% 
  filter(p.format < 0.05) %>% 
  select(group1, group2) %>% 
  pmap(c) # Esto genera una lista de vectores de caracteres

# Ahora genera la gráfica solo con comparaciones significativas
ggbarplot(SWANVISIT10_mod %>% drop_na(STATUS), x = "STATUS", y = "SKELMM",
          add = "mean_se",
          color = "STATUS", fill = "STATUS",
          palette = "jco", alpha = 0.7) +
stat_compare_means(comparisons = comparaciones_significativas,
                     method = "t.test",
                     p.adjust.method = "bonferroni") +
  stat_compare_means(method = "anova", 
                     label.y = max(SWANVISIT10_mod$SKELMM, na.rm = TRUE) * 1.2, # ligeramente arriba del valor máximo
                     label.x = 0.7, # posición izquierda
                     hjust = 0) + # alineación izquierda
  theme(axis.text.x = element_text(angle = 60, hjust = 1))

11 Generalized linear regression models (GLM) for dependent variable: SHBG

11.1 Independent variable: Hormones + SKELMM + AGE + STATUS + OTHERS.

11.1.1 Gamma models:

  • SHBGM17: SHBG10 = T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + SKELMM10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10
  • SHBGM17.2: SHBG10 = FSH10 + E2AVE10 + TSH10 + DHAS10 + SKELMM10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10
  • SHBGM18: log(SHBG10) = T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + SKELMM10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10
  • SHBGM18.2 log(SHBG10) = FSH10 + E2AVE10 + TSH10 + DHAS10 + SKELMM10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10

11.1.2 Gaussian models:

  • SHBGM19: SHBG10 = T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + SKELMM10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10
  • SHBGM19.2: SHBG10 = FSH10 + E2AVE10 + TSH10 + DHAS10 + SKELMM10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10
  • SHBGM20: log(SHBG10) = T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + SKELMM10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10
  • SHBGM20.2 log(SHBG10) = FSH10 + E2AVE10 + DHAS10 + SKELMM10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10
  • SHBGM21: 1/SHBG10 = T10 + FSH10 + E2AVE10 + TSH10 + SKELMM10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10
  • SHBGM21.2: 1/SHBG10 = FSH10 + E2AVE10 + AGE10 + DHAS + SKELMM10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10
Code
# Lista de columnas a modificar
variables_a_modificar <- c("STEROI110", "ARTHRT110", "OSTEOAR10", "OSTEOPR10", "EXERCIS10")

# Aplicar la transformación a todas las columnas seleccionadas
SWANVISIT10 <- SWANVISIT10 %>%
  mutate(across(all_of(variables_a_modificar), ~ case_when(
    . == "No"  ~ "No",
    . == "Yes" ~ "Yes",
    TRUE ~ NA_character_
  ))) %>%
  mutate(across(all_of(variables_a_modificar), factor)) %>%  # Reconversión a factor
  mutate(V_ACTI10 = case_when(
    V_ACTI10 %in% c("Missing", "Do not know", "Refused", "N/A") ~ NA_character_,
    TRUE ~ V_ACTI10  # Mantener el resto de valores iguales
  )) %>%
  mutate(V_ACTI10 = factor(V_ACTI10))


#Dependent variable: SHBG#######################################################
#GAMMA IDENTITY ################################################################
SHBGM17 <- glm(SHBG10 ~  T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + SKELMM10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10, data = SWANVISIT10, 
               Gamma(link = "identity"))

SHBGM17.2 <- glm(SHBG10 ~ FSH10 + E2AVE10 + TSH10  + DHAS10 + SKELMM10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10, data = SWANVISIT10, 
               Gamma(link = "identity"))

#GAMMA LOG #####################################################################
SHBGM18 <- glm(SHBG10 ~ T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + SKELMM10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10, data = SWANVISIT10, 
               Gamma(link = "log"))

SHBGM18.2 <- glm(SHBG10 ~ FSH10 + E2AVE10 + TSH10 + DHAS10 + SKELMM10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10 , data = SWANVISIT10, 
               Gamma(link = "log"))

#GAMMA INVERSE NO RUN ##########################################################
# SHBGM3 <- glm(SHBG10 ~ T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + SKELMM10+ STATUS10 , data = SWANVISIT10,
#               Gamma(link = "inverse"))
# SHBGM3.2 <- glm(SHBG10 ~ FSH10 + E2AVE10 + TSH10 + DHAS10 + SKELMM10  + STATUS10 , data = SWANVISIT10,Gamma(link = "inverse"))

#GAUSSIAN IDENTITY #############################################################
SHBGM19 <- glm(SHBG10 ~  T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + SKELMM10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10, data = SWANVISIT10, 
               gaussian(link = "identity"))

SHBGM19.2 <- glm(SHBG10 ~ FSH10 + E2AVE10 + TSH10  + DHAS10 + SKELMM10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10, data = SWANVISIT10, 
               gaussian(link = "identity"))

#GAUSSIAN LOG ##################################################################
SHBGM20 <- glm(SHBG10 ~ T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + SKELMM10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10 , data = SWANVISIT10, 
               gaussian(link = "log"))

SHBGM20.2 <- glm(SHBG10 ~ FSH10 + E2AVE10  + DHAS10 + SKELMM10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10 , data = SWANVISIT10, 
               gaussian(link = "log"))

#GAUSSIAN INVERSE  #############################################################
SHBGM21 <- glm(SHBG10 ~ T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + SKELMM10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10, data = SWANVISIT10,
              gaussian(link = "inverse"))
SHBGM21.2 <- glm(SHBG10 ~ FSH10 + E2AVE10 + AGE10 + DHAS10 + SKELMM10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10, data = SWANVISIT10,
              gaussian(link = "inverse"))

table1<-compare_performance(SHBGM17, SHBGM17.2, SHBGM18, SHBGM18.2, SHBGM19, SHBGM19.2, SHBGM20, SHBGM20.2, SHBGM21, SHBGM21.2, rank = TRUE, verbose = FALSE)

11.1.3 Performance models.

Code
kable(table1, caption = "Performance models comparision") %>%
  kable_styling(full_width = T, font_size = 6) %>% collapse_rows()%>%
  kable_styling(latex_options = "HOLD_position")
Performance models comparision
Name Model RMSE Sigma R2 R2_Nagelkerke AIC_wt AICc_wt BIC_wt Performance_Score
SHBGM18 glm 24.61331 0.5392872 NA 0.2255456 0.9961725 0.9959990 0.5334294 0.8790177
SHBGM18.2 24.71675 0.5393781 0.2236555 0.0038219 0.0039953 0.4665617 0.4208476
SHBGM17 24.50303 0.5421492 0.2132762 0.0000056 0.0000056 0.0000030 0.3159580
SHBGM17.2 24.53413 0.5420986 0.2122037 0.0000000 0.0000001 0.0000060 0.3055346
SHBGM20 24.25233 24.3824785 0.2151696 NA 0.0000000 0.0000000 0.2047755
SHBGM19 24.33326 24.4636614 0.2099227 0.1769864
SHBGM21 24.36186 24.4926344 0.2080646 0.1671652
SHBGM19.2 24.38626 24.5022493 0.2072728 0.1589086
SHBGM20.2 24.82601 24.9350765 0.1960184 0.0079843
SHBGM21.2 24.84908 24.9657240 0.1945232 0.0000000
Code
plot(compare_performance(SHBGM17, SHBGM17.2, SHBGM18, SHBGM18.2, SHBGM19, SHBGM19.2, SHBGM20, SHBGM20.2, SHBGM21, SHBGM21.2, rank = TRUE, verbose = FALSE))

11.1.4 Result for top 4 models: SHBGM18,SHBGM18.2, SHBGM17.2, SHBGM17

Code
stargazer(SHBGM18,SHBGM18.2, SHBGM17.2, SHBGM17,type = "html", title="Results")
Results
Dependent variable:
SHBG10
glm: Gamma glm: Gamma
link = log link = identity
(1) (2) (3) (4)
T10 -0.0001 -0.010
(0.0003) (0.013)
FSH10 0.001*** 0.001*** 0.075*** 0.073***
(0.0003) (0.0003) (0.013) (0.013)
E2AVE10 0.003*** 0.003*** 0.187*** 0.187***
(0.0004) (0.0004) (0.024) (0.024)
TSH10 -0.016*** -0.015*** -0.459*** -0.468***
(0.005) (0.005) (0.087) (0.088)
AGE10 -0.011** -0.293
(0.005) (0.224)
DHAS10 -0.001*** -0.001*** -0.038*** -0.036***
(0.0002) (0.0002) (0.006) (0.006)
SKELMM10 -0.033*** -0.032*** -1.133*** -1.146***
(0.004) (0.004) (0.170) (0.171)
PBFBIA10 -0.017*** -0.017*** -0.624*** -0.621***
(0.002) (0.002) (0.064) (0.065)
STATUS10Post-menopausal -0.011 -0.030 -0.779 0.063
(0.058) (0.057) (2.263) (2.255)
STATUS10Late perimenopause -0.125 -0.118 -5.590* -5.532*
(0.077) (0.077) (2.908) (2.905)
STATUS10Early perimenopause -0.057 -0.041 -1.404 -1.634
(0.078) (0.077) (3.199) (3.229)
STATUS10Pre-menopausal 0.040 0.064 5.627 5.011
(0.250) (0.249) (12.456) (12.468)
STATUS10Unknown due to hormones (HT) use 0.125 0.136 6.964 6.498
(0.120) (0.120) (6.145) (6.099)
STATUS10Unknown due to hysterectomy -0.147 -0.150 -3.443 -3.231
(0.098) (0.098) (3.775) (3.762)
V_ACTI10Yes, limited a little -0.052* -0.053* -2.804** -2.799**
(0.030) (0.030) (1.343) (1.341)
V_ACTI10Yes, limited a lot -0.017 -0.020 -0.662 -0.585
(0.037) (0.037) (1.554) (1.552)
EXERCIS10Yes 0.001 -0.0003 -0.011 0.140
(0.031) (0.031) (1.270) (1.271)
Constant 5.692*** 5.050*** 85.318*** 101.602***
(0.350) (0.142) (5.992) (14.581)
Observations 1,693 1,694 1,694 1,693
Log Likelihood -7,564.272 -7,571.835 -7,583.104 -7,576.363
Akaike Inf. Crit. 15,164.540 15,175.670 15,198.210 15,188.730
Note: p<0.1; p<0.05; p<0.01
Code
check_model(SHBGM18)

12 K-folds cross validation predictions for dependent variable: SHBG.

12.1 SHBGM18: log(SHBG10) = T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + SKELMM10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10

Code
NSIM<-2999

TEST<- drop_na(SWANVISIT10, c("SHBG10","T10", "FSH10" , "E2AVE10", "TSH10" , "AGE10" , "DHAS10", "SKELMM10" , "PBFBIA10" , "STATUS10" , "V_ACTI10" , "EXERCIS10"))

folds<-c()
fold_cv<-c() 
table2<-data.frame(matrix(nrow = NSIM, ncol = 2)) 
colnames(table2) = c("MEAN","SD")
for (i in 1:NSIM) {
  folds[[i]] <- createFolds(TEST$SHBG10, k=5) 
  fold_cv[[i]] <- lapply(folds[[i]], function(x){
    TEST_train <- TEST[-x, ]
    TEST_test <- TEST[x, ]
    TEST_model <- glm(SHBG10 ~ T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + SKELMM10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10, data = TEST_test, 
               Gamma(link = "log"))
    pred_results <- TEST_model$fitted.values
    pred_cor <- cor(TEST_test$SHBG10, pred_results)
    return(pred_cor)
  })
  table2[i,1]<- mean(unlist(fold_cv[[i]]))
  table2[i,2]<- sd(unlist(fold_cv[[i]]))
}

table2$CIlwr<- table2$MEAN + quantile(distributions3::StudentsT(df = 4), 0.05 / 2) * table2$SD / sqrt(4)
table2$CIupr<- table2$MEAN + quantile(distributions3::StudentsT(df = 4), 1 - 0.05/ 2) * table2$SD / sqrt(4)

table2 %>% 
  ggplot(aes(x = 1:NSIM, y = MEAN)) + 
  geom_ribbon(aes(ymin = CIlwr, ymax = CIupr), alpha = 0.3, fill = "red")+
  geom_line(colour = "blue",alpha = 0.5) + 
  theme_minimal()+
  xlab("Simulation")+
  ylab("Mean of k-folds CV, Correlation(Fitted Values-Actual Values). CI 95%")

13 Generalized linear regression models (GLM) for dependent variable: SHBG

13.1 Independent variable: Hormones + SKELMM + AGE + STATUS + OTHERS.

13.1.1 Gamma models:

  • SKELMMM9: SHBG10 = T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + SKELMM10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10
  • SKELMMM9.2: SHBG10 = FSH10 + E2AVE10 + TSH10 + DHAS10 + SKELMM10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10
  • SKELMMM10: log(SHBG10) = T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + SKELMM10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10
  • SKELMMM10.2 log(SHBG10) = FSH10 + E2AVE10 + TSH10 + DHAS10 + SKELMM10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10

13.1.2 Gaussian models:

  • SKELMMM11: SHBG10 = T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + SKELMM10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10
  • SKELMMM11.2: SHBG10 = FSH10 + E2AVE10 + TSH10 + DHAS10 + SKELMM10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10
  • SKELMMM12: log(SHBG10) = T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + SKELMM10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10
  • SKELMMM12.2 log(SHBG10) = FSH10 + E2AVE10 + DHAS10 + SKELMM10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10
  • SKELMMM13: 1/SHBG10 = T10 + FSH10 + E2AVE10 + TSH10 + SKELMM10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10
  • SKELMMM13.2: 1/SHBG10 = FSH10 + E2AVE10 + AGE10 + DHAS + SKELMM10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10
Code
#Dependent variable: SHBG#######################################################
#GAMMA IDENTITY ################################################################
SKELMMM9 <- glm(SKELMM10 ~  T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + SHBG10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10, data = SWANVISIT10, 
               Gamma(link = "identity"))

SKELMMM9.2 <- glm(SKELMM10 ~ FSH10 + E2AVE10 + TSH10  + DHAS10 + SHBG10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10, data = SWANVISIT10, 
               Gamma(link = "identity"))

#GAMMA LOG #####################################################################
SKELMMM10 <- glm(SKELMM10 ~ T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + SHBG10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10, data = SWANVISIT10, 
               Gamma(link = "log"))

SKELMMM10.2 <- glm(SKELMM10 ~ FSH10 + E2AVE10 + TSH10 + DHAS10 + SHBG10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10 , data = SWANVISIT10, 
               Gamma(link = "log"))

#GAMMA INVERSE NO RUN ##########################################################
# SHBGM3 <- glm(SKELMM10 ~ T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + SKELMM10+ STATUS10 , data = SWANVISIT10,
#               Gamma(link = "inverse"))
# SHBGM3.2 <- glm(SKELMM10 ~ FSH10 + E2AVE10 + TSH10 + DHAS10 + SKELMM10  + STATUS10 , data = SWANVISIT10,Gamma(link = "inverse"))

#GAUSSIAN IDENTITY #############################################################
SKELMMM11 <- glm(SKELMM10 ~  T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + SHBG10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10, data = SWANVISIT10, 
               gaussian(link = "identity"))

SKELMMM11.2 <- glm(SKELMM10 ~ FSH10 + E2AVE10 + TSH10  + DHAS10 + SHBG10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10, data = SWANVISIT10, 
               gaussian(link = "identity"))

#GAUSSIAN LOG ##################################################################
SKELMMM12 <- glm(SKELMM10 ~ T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + SHBG10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10 , data = SWANVISIT10, 
               gaussian(link = "log"))

SKELMMM12.2 <- glm(SKELMM10 ~ FSH10 + E2AVE10  + DHAS10 + SHBG10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10 , data = SWANVISIT10, 
               gaussian(link = "log"))

#GAUSSIAN INVERSE  #############################################################
SKELMMM13 <- glm(SKELMM10 ~ T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + SHBG10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10, data = SWANVISIT10,
              gaussian(link = "inverse"))
SKELMMM13.2 <- glm(SKELMM10 ~ FSH10 + E2AVE10 + AGE10 + DHAS10 + SHBG10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10, data = SWANVISIT10,
              gaussian(link = "inverse"))

table1<-compare_performance(SKELMMM9, SKELMMM9.2, SKELMMM10, SKELMMM10.2, SKELMMM11, SKELMMM11.2, SKELMMM12, SKELMMM12.2, SKELMMM13, SKELMMM13.2, rank = TRUE, verbose = FALSE)

13.1.3 Performance models.

Code
kable(table1, caption = "Performance models comparision") %>%
  kable_styling(full_width = T, font_size = 6) %>% collapse_rows()%>%
  kable_styling(latex_options = "HOLD_position")
Performance models comparision
Name Model RMSE Sigma R2 R2_Nagelkerke AIC_wt AICc_wt BIC_wt Performance_Score
SKELMMM10 glm 2.955942 0.1448216 NA 0.1663215 0.9556699 0.9556699 0.9556520 0.9325137
SKELMMM9 2.977014 0.1451060 0.1632715 0.0443301 0.0443301 0.0443292 0.2601494
SKELMMM10.2 2.971884 0.1461721 0.1500471 0.0000001 0.0000001 0.0000188 0.2566328
SKELMMM13 2.941746 2.9575062 0.1657517 NA 0.0000000 0.0000000 0.0000000 0.2025718
SKELMMM9.2 2.983816 0.1469107 NA 0.1428842 0.1998534
SKELMMM13.2 2.943158 2.9569653 0.1628850 NA 0.1958955
SKELMMM12 2.947285 2.9630799 0.1626068 0.1758462
SKELMMM11 2.954496 2.9703286 0.1585042 0.1410570
SKELMMM12.2 2.964400 2.9774262 0.1507579 0.0934783
SKELMMM11.2 2.979973 2.9941461 0.1435099 0.0182732
Code
plot(compare_performance(SKELMMM9, SKELMMM9.2, SKELMMM10, SKELMMM10.2, SKELMMM11, SKELMMM11.2, SKELMMM12, SKELMMM12.2, SKELMMM13, SKELMMM13.2, rank = TRUE, verbose = FALSE))

13.1.4 Result for top 4 models: SKELMMM10,SKELMMM9, SKELMMM10.2, SKELMMM13

Code
stargazer(SKELMMM10,SKELMMM9, SKELMMM10.2, SKELMMM13,type = "html", title="Results")
Results
Dependent variable:
SKELMM10
glm: Gamma glm: Gamma glm: Gamma glm: gaussian
link = log link = identity link = log link = inverse
(1) (2) (3) (4)
T10 0.0003*** 0.014*** -0.00001***
(0.0001) (0.003) (0.00000)
FSH10 -0.001*** -0.012*** -0.001*** 0.00004***
(0.0001) (0.001) (0.0001) (0.00000)
E2AVE10 -0.00001 0.0003 0.0001 0.00000
(0.0001) (0.002) (0.0001) (0.00000)
TSH10 -0.001 -0.009 -0.0005 0.0001
(0.001) (0.027) (0.001) (0.0001)
AGE10 -0.006*** -0.121*** 0.0003***
(0.001) (0.029) (0.0001)
DHAS10 -0.0001*** -0.003*** -0.0001* 0.00001**
(0.00005) (0.001) (0.00005) (0.00000)
SHBG10 -0.001*** -0.020*** -0.001*** 0.0001***
(0.0001) (0.003) (0.0001) (0.00001)
PBFBIA10 -0.0004 -0.006 -0.0001 0.00002
(0.0005) (0.009) (0.0005) (0.00002)
STATUS10Post-menopausal -0.046*** -0.989*** -0.049*** 0.002***
(0.015) (0.327) (0.015) (0.001)
STATUS10Late perimenopause -0.009 -0.188 0.001 0.001
(0.021) (0.441) (0.021) (0.001)
STATUS10Early perimenopause -0.059*** -1.096** -0.046** 0.003***
(0.021) (0.439) (0.021) (0.001)
STATUS10Pre-menopausal -0.191*** -3.527*** -0.183*** 0.010***
(0.067) (1.260) (0.067) (0.004)
STATUS10Unknown due to hormones (HT) use -0.031 -0.551 -0.022 0.002
(0.032) (0.674) (0.032) (0.002)
STATUS10Unknown due to hysterectomy -0.015 -0.287 -0.012 0.001
(0.026) (0.558) (0.026) (0.001)
V_ACTI10Yes, limited a little -0.003 -0.041 -0.003 0.0002
(0.008) (0.164) (0.008) (0.0004)
V_ACTI10Yes, limited a lot 0.031*** 0.682*** 0.030*** -0.001***
(0.010) (0.201) (0.010) (0.0005)
EXERCIS10Yes 0.015* 0.328* 0.014 -0.001
(0.008) (0.171) (0.009) (0.0004)
Constant 3.522*** 29.930*** 3.177*** 0.023***
(0.089) (1.794) (0.030) (0.004)
Observations 1,693 1,693 1,694 1,693
Log Likelihood -4,173.585 -4,176.656 -4,191.851 -4,230.015
Akaike Inf. Crit. 8,383.171 8,389.312 8,415.702 8,496.031
Note: p<0.1; p<0.05; p<0.01
Code
check_model(SKELMMM10)

14 K-folds cross validation predictions for dependent variable: SHBG.

14.1 SHBGM18: log(SHBG10) = T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + SKELMM10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10

Code
NSIM<-2999

TEST<- drop_na(SWANVISIT10, c("SHBG10","T10", "FSH10" , "E2AVE10", "TSH10" , "AGE10" , "DHAS10", "SKELMM10" , "PBFBIA10" , "STATUS10" , "V_ACTI10" , "EXERCIS10"))

folds<-c()
fold_cv<-c() 
table2<-data.frame(matrix(nrow = NSIM, ncol = 2)) 
colnames(table2) = c("MEAN","SD")
for (i in 1:NSIM) {
  folds[[i]] <- createFolds(TEST$SKELMM10, k=5) 
  fold_cv[[i]] <- lapply(folds[[i]], function(x){
    TEST_train <- TEST[-x, ]
    TEST_test <- TEST[x, ]
    TEST_model <- glm(SKELMM10 ~ T10 + FSH10 + E2AVE10 + TSH10 + AGE10 + DHAS10 + SHBG10 + PBFBIA10 + STATUS10 + V_ACTI10 + EXERCIS10, data = TEST_test, 
               Gamma(link = "log"))
    pred_results <- TEST_model$fitted.values
    pred_cor <- cor(TEST_test$SKELMM10, pred_results)
    return(pred_cor)
  })
  table2[i,1]<- mean(unlist(fold_cv[[i]]))
  table2[i,2]<- sd(unlist(fold_cv[[i]]))
}

table2$CIlwr<- table2$MEAN + quantile(distributions3::StudentsT(df = 4), 0.05 / 2) * table2$SD / sqrt(4)
table2$CIupr<- table2$MEAN + quantile(distributions3::StudentsT(df = 4), 1 - 0.05/ 2) * table2$SD / sqrt(4)

table2 %>% 
  ggplot(aes(x = 1:NSIM, y = MEAN)) + 
  geom_ribbon(aes(ymin = CIlwr, ymax = CIupr), alpha = 0.3, fill = "red")+
  geom_line(colour = "blue",alpha = 0.5) + 
  theme_minimal()+
  xlab("Simulation")+
  ylab("Mean of k-folds CV, Correlation(Fitted Values-Actual Values). CI 95%")