# Improved simulation with actual treatment effects
simulate_main <- function(
    n = 100,
    max_follow = 30,
    trt_prob = 0.5,
    rec_shape = 2, rec_scale = 12,
    death_shape = 1.5, death_scale = 25,
    rec_benefit = 1.5,    # Treatment speeds recovery
    death_benefit = 1.8,  # Treatment delays death
    censor_rate = 0.15,
    baseline_min = 4, baseline_max = 7
){
  Treatment <- rbinom(n, 1, trt_prob)

  rec_scale_adj <- ifelse(Treatment == 1, rec_scale / rec_benefit, rec_scale)
  death_scale_adj <- ifelse(Treatment == 1, death_scale * death_benefit, death_scale)

  T_rec <- numeric(n)
  T_death <- numeric(n)

  for(i in 1:n) {
    T_rec[i] <- rweibull(1, rec_shape, rec_scale_adj[i])
    T_death[i] <- rweibull(1, death_shape, death_scale_adj[i])
  }

  T_rec <- pmin(T_rec, max_follow)
  T_death <- pmin(T_death, max_follow)

  TimeToRecovery <- round(T_rec)
  TimeToDeath <- round(T_death)

  RecCens <- as.integer(T_rec >= max_follow)
  DthCens <- as.integer(T_death >= max_follow)

  extra_cens <- rbinom(n, 1, censor_rate)
  RecCens[extra_cens == 1] <- 1
  DthCens[extra_cens == 1] <- 1

  BaselineScore <- sample(baseline_min:baseline_max, n, replace = TRUE)

  data.frame(
    ID = sprintf("Patient_%03d", 1:n),
    TimeToRecovery = TimeToRecovery,
    TimeToDeath = TimeToDeath,
    RecoveryCensoringIndicator = RecCens,
    DeathCensoringIndicator = DthCens,
    BaselineScore = BaselineScore,
    Treatment = Treatment,
    stringsAsFactors = FALSE
  )
}

simulate_longitudinal <- function(
    main_df,
    visit_days = c(1, 3, 5, 7, 10, 14, 18, 21, 25, 28),
    ord_min = 1, ord_max = 8,
    p_improve_ctrl = 0.3,
    p_improve_trt = 0.45,
    p_worsen_ctrl = 0.25,
    p_worsen_trt = 0.15
){
  long_list <- lapply(seq_len(nrow(main_df)), function(i){
    this_id <- main_df$ID[i]
    base_score <- main_df$BaselineScore[i]
    is_treatment <- main_df$Treatment[i]

    max_time <- pmax(main_df$TimeToRecovery[i], main_df$TimeToDeath[i])
    days <- visit_days[visit_days <= max_time]

    if(length(days) == 0) {
      return(data.frame(
        PersonID = this_id,
        OrdinalScore = base_score,
        RelativeDay = 0,
        stringsAsFactors = FALSE
      ))
    }

    scores <- numeric(length(days) + 1)
    scores[1] <- base_score

    if(is_treatment == 1) {
      p_improve <- p_improve_trt
      p_worsen <- p_worsen_trt
    } else {
      p_improve <- p_improve_ctrl
      p_worsen <- p_worsen_ctrl
    }

    p_stable <- 1 - p_improve - p_worsen

    for(k in 2:(length(days) + 1)) {
      step <- sample(c(1, 0, -1), 1, prob = c(p_improve, p_stable, p_worsen))
      scores[k] <- pmax(ord_min, pmin(ord_max, scores[k-1] + step))
    }

    data.frame(
      PersonID = this_id,
      OrdinalScore = scores,
      RelativeDay = c(0, days),
      stringsAsFactors = FALSE
    )
  })

  do.call(rbind, long_list)
}

set.seed(2025)
main_df <- simulate_main(n = 150)
long_df <- simulate_longitudinal(main_df)

cat("=== Main Dataset ===\n")
print(head(main_df, 6))

cat("\n=== Recovery/Death Times by Treatment ===\n")
print(tapply(main_df$TimeToRecovery, main_df$Treatment, summary))
print(tapply(main_df$TimeToDeath, main_df$Treatment, summary))

cat("\n=== Longitudinal Dataset ===\n")
print(head(long_df, 6))

write.csv(main_df, "main_df.csv", row.names = FALSE)
write.csv(long_df, "long_df.csv", row.names = FALSE)

