From 573394c6570c74467fee2b042dc52ba34e65e3dd Mon Sep 17 00:00:00 2001
From: Youngrok Lee <youngrok.lee.dev@gmail.com>
Date: Wed, 13 May 2026 23:22:49 -0400
Subject: [PATCH 1/2] config Air for code formatting

---
 .vscode/extensions.json |  5 +++++
 .vscode/settings.json   | 10 ++++++++++
 air.toml                |  0
 3 files changed, 15 insertions(+)
 create mode 100644 .vscode/extensions.json
 create mode 100644 .vscode/settings.json
 create mode 100644 air.toml

diff --git a/.vscode/extensions.json b/.vscode/extensions.json
new file mode 100644
index 0000000..344f76e
--- /dev/null
+++ b/.vscode/extensions.json
@@ -0,0 +1,5 @@
+{
+    "recommendations": [
+        "Posit.air-vscode"
+    ]
+}
diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 0000000..700f1ee
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,10 @@
+{
+  "[r]": {
+    "editor.formatOnSave": true,
+    "editor.defaultFormatter": "Posit.air-vscode"
+  },
+  "[quarto]": {
+    "editor.formatOnSave": false,
+    "editor.defaultFormatter": "quarto.quarto"
+  }
+}
\ No newline at end of file
diff --git a/air.toml b/air.toml
new file mode 100644
index 0000000..e69de29

From d460cbd2fd6c0d618cbc52dac5484c72527259a2 Mon Sep 17 00:00:00 2001
From: Youngrok Lee <youngrok.lee.dev@gmail.com>
Date: Mon, 18 May 2026 22:26:45 -0400
Subject: [PATCH 2/2] add roxygen comments for function documents

---
 experimental/example-refactored.R | 246 ++++++++++++++++++++++++++++--
 1 file changed, 233 insertions(+), 13 deletions(-)

diff --git a/experimental/example-refactored.R b/experimental/example-refactored.R
index 8a14408..34490d8 100644
--- a/experimental/example-refactored.R
+++ b/experimental/example-refactored.R
@@ -16,6 +16,26 @@ tab <- read.csv(
 # Step: Cloning
 #########################
 
+#' Duplicate data frame for each treatment arm to emulate
+#'
+#' @param data Input data frame that contains all observations of interest.
+#'   Each row represents an observation, and columns include
+#'   observation identifiers, binary treatment variable (0/1),
+#'   time to treatement (continuous), binary outcome variable (0/1),
+#'   observed followup time (continuous), and covariates.
+#' @param arms Character vector that each element represents each arm's name.
+#'
+#' @returns A list of data frame.
+#'   Each element of list is associated with each arm.
+#'
+#' @export
+#' @examples
+#' tab <- read.csv(
+#'   "example_data/ije-2019-08-1035-File008.csv",
+#'   sep = ",",
+#'   header = TRUE
+#' )
+#' clones <- clone_arms(tab, c("Control", "Surgery"))
 clone_arms <- function(data, arms) {
   n <- length(arms)
   if (n <= 1) {
@@ -63,7 +83,21 @@ apply_policy_old <- function(
   res
 }
 
-# apply case_when logics (e.g. policy, censoring) to clones
+#' Apply case_when logics (e.g. policy, censoring) to clones
+#'
+#' @param clones A list of data frame. Each element of the list represents
+#'   each treatment arm.
+#' @param logics A nested list. Each element of outer list represents each
+#'   treatment arm. Each element of inner list represents each new variable
+#'   to be created by applying logics. Each element of inner list containts a
+#'   character vector that represents a sequence of logics to be passed into
+#'   `case_when()` call to determine a value of the new variable.
+#'
+#' @returns A list of data frame that each data frame include new variables
+#'   created by the provided logics.
+#'
+#' @export
+#' @examples
 apply_logics <- function(
   clones,
   logics
@@ -96,8 +130,52 @@ apply_logics <- function(
 }
 
 
-# create clone policy for scenario A
-# return as a nested list
+#' Generate clone policy for scenario A
+#'
+#' @param arms A character vector of length 2. The first element represents
+#'   a name of the untreated arm, and the second element represents a name of
+#'   the treated arm.
+#' @param treatment A name of variable that represents whether each observation
+#'   was treated or not in observational data. The treatment variable should
+#'   exists in data frame that the return value of this function will be
+#'   applied, and the treatment variable value in the data frame should be
+#'   either 0 and 1, i.e. binary treatment.
+#' @param time_to_treatment A name of variable that represent time to
+#'   treatment in the observational data. The time-to-treatment variable should
+#'   exists in data frame that the return value of this function will be
+#'   applied, and the value in the data frame should be either numeric value
+#'   or NA if the observation was untreated in the observational data.
+#' @param grace_period A numeric value to represent grace period of treatment.
+#'   Treatment policy is assumed to be "provide treatment within the grace
+#'   period."
+#' @param outcome A name of variable that represent outcome in the
+#'   observational data. The outcome variable should exists in data frame that
+#'   the return value of this function will be applied, and the outcome
+#'   variable value in the data frame should be either 0 and 1, i.e. binary
+#'   outcome.
+#' @param followup A name of variable that represent follow up time in the
+#'   observational data. The follow up time variable should exists in data
+#'   frame that the return value of this function will be applied, and the
+#'   follow up time variable value in the data frame should be numeric.
+#' @param clone_outcome A name of variable to be newly created to represent
+#'   emulated outcome in cloned data frame. The new variable name should not
+#'   already exist in the data frame that the return value of this function
+#'   will be applied, to avoid accidental overwriting.
+#' @param clone_followup A name of variable to be newly created to represent
+#'   emulated follow up time in cloned data frame. The new variable name should
+#'   not already exist in the data frame that the return value of this function
+#'   will be applied, to avoid accidental overwriting.
+#'
+#' @returns A nested list. The first element of the outer list represents
+#'   untreated arm, while the second element of the outer list represents
+#'   treated arm. For each element of outer list, the first element of the inner
+#'   list represents emulated outcome, and the second element of the inner list
+#'   represents emulated follow up time. Each element of the inner list
+#'   represents a sequence of logics to be passed into `case_when()` when
+#'   creating new variables for emulated outcome and follow up time.
+#'
+#' @export
+#' @examples
 create_policy_A <- function(
   arms,
   treatment,
@@ -239,8 +317,54 @@ censor_arms_old <- function(
   res
 }
 
-# create censoring logic for scenario A
-# return as a nested list
+#' Generate censoring logic for scenario A
+#'
+#' @param arms A character vector of length 2. The first element represents
+#'   a name of the untreated arm, and the second element represents a name of
+#'   the treated arm.
+#' @param treatment A name of variable that represents whether each observation
+#'   was treated or not in observational data. The treatment variable should
+#'   exists in data frame that the return value of this function will be
+#'   applied, and the treatment variable value in the data frame should be
+#'   either 0 and 1, i.e. binary treatment.
+#' @param time_to_treatment A name of variable that represent time to
+#'   treatment in the observational data. The time-to-treatment variable should
+#'   exists in data frame that the return value of this function will be
+#'   applied, and the value in the data frame should be either numeric value
+#'   or NA if the observation was untreated in the observational data.
+#' @param grace_period A numeric value to represent grace period of treatment.
+#'   Treatment policy is assumed to be "provide treatment within the grace
+#'   period."
+#' @param followup A name of variable that represent follow up time in the
+#'   observational data. The follow up time variable should exists in data
+#'   frame that the return value of this function will be applied, and the
+#'   follow up time variable value in the data frame should be numeric.
+#' @param clone_censoring A name of binary indicator variable to be newly
+#'   created to represent whether the observation violates arm's policy or not.
+#'   The new variable name should not already exist in the data frame that the
+#'   return value of this function will be applied, to avoid accidental
+#'   overwriting.
+#' @param clone_uncensored_followup A name of variable to be newly created to
+#'   represent the earliest time that the value of the emulated censoring binary
+#'   indicator (i.e. variable to be named according to `clone_censoring`
+#'   argument) value can be determined for each observation. The new variable
+#'   name should not already exist in the data frame that the return value of
+#'   this function will be applied, to avoid accidental overwriting.
+#'
+#' @returns A nested list. The first element of the outer list represents
+#'   untreated arm, while the second element of the outer list represents
+#'   treated arm. For each element of outer list, the first element of the inner
+#'   list represents emulated censoring binary indicator (0/1) that represents
+#'   whether the observation violated the arm's policy or not within the grace
+#'   period, and the second. The second element of the inner list represents
+#'   the earliest time that the value of the emulated censoring binary
+#'   indicator can be determined for each observation. Each element of the
+#'   inner list represents a sequence of logics to be passed into `case_when()`
+#'   when creating new variables for emulated censoring indicator and censoring
+#'   time.
+#'
+#' @export
+#' @examples
 create_censoring_logics_A <- function(
   arms,
   treatment,
@@ -376,7 +500,22 @@ create_timestamp_table_old <- function(clones) {
   res
 }
 
-# create timestamp table
+#' Create timestamp table
+#'
+#' @param clones A list of data frame. Each element of the list represents
+#'   each treatment arm. This version of clones must contain a column that
+#'   represents a emulated follow up time.
+#' @param clone_followup A column name in emulcated clone (i.e. `clones`)
+#'   that represents the emulated follow up time in cloned data frame.
+#'
+#' @returns A data frame with two columns: `tevent` and `ID_t`.
+#'   `tevent` represents a timestamp that outcome event can occur based on
+#'   observed data. `ID_t` represents an enumerated identifier of each
+#'   timestamp, from 1 to n where n represents the number of unique `tevent`
+#'   value.
+#'
+#' @export
+#' @examples
 create_timestamp_table <- function(clones, clone_followup) {
   timestamps <- sapply(clones, `[[`, i = clone_followup, simplify = FALSE)
   t_events <- sort(unique(unlist(timestamps)))
@@ -407,14 +546,39 @@ split_at_timestamp_old <- function(clones, t_events, event) {
   res
 }
 
-# split data at each time event
+#' Split each observation into multiple subrecords at each time cut
+#'
+#' @param clones A list of data frame. Each element of the list represents
+#'   each treatment arm. This version of clones must contain a column that
+#'   represents a emulated follow up time (corresponding to `clone_followup`
+#'   argument) and an event of interest (corresponding to `event` argument).
+#' @param clone_followup A column name in emulcated clone (i.e. `clones`)
+#'   that represents the emulated follow up time in cloned data frame.
+#' @param t_events A vector of timestamp that outcome event can occur based on
+#'   observed data.
+#' @param event A variable name of an event of interest. The variable should
+#'   exists in each data frame that is an element of `clones` argument, and
+#'   the variable value should be a binary (0 or 1).
+#' @param timestamp_start A new variable name to denote start time.
+#' @param id A new variable name for a unique observation identifier, to
+#'   represents that multiple rows in output data frame is associated with the
+#'   same observation.
+#'
+#' @returns A list of long-form data frames. Each data frame represents each
+#'   clone arm. Each row of the long-form data frame represents a subrecord of
+#'   each observation associated with each specific time interval. The first
+#'   subrecord starts with time 0, and the rows are expanded up to
+#'   `clone_followup`, where cut times are determined by `t_events` argument.
+#'
+#' @export
+#' @examples
 split_at_timestamp <- function(
   clones,
   clone_followup,
   t_events,
   event,
   timestamp_start = "Tstart",
-  timestamp_id = "ID"
+  id = "ID"
 ) {
   arms <- names(clones)
 
@@ -428,13 +592,33 @@ split_at_timestamp <- function(
         end = clone_followup,
         start = timestamp_start,
         event = event,
-        id = timestamp_id
+        id = id
       )
   }
 
   res
 }
 
+df_timestamp <- create_timestamp_table(clones_censored, "fup")
+
+clones_splitted_by_outcome <- split_at_timestamp(
+  clones_censored,
+  "fup",
+  df_timestamp$tevent,
+  "outcome",
+  "Tstart",
+  "ID"
+)
+
+clones_splitted_by_censoring <- split_at_timestamp(
+  clones_censored,
+  "fup",
+  df_timestamp$tevent,
+  "censoring",
+  "Tstart",
+  "ID"
+)
+
 
 # Function to create training data for censoring probability estimation
 # TO DO: Eliminate dependency on {purrr}
@@ -482,7 +666,43 @@ create_final_data_old <- function(clones) {
   res
 }
 
-# Function to create training data for censoring probability estimation
+#' Create training data for censoring probability estimation
+#'
+#' @param clones A list of data frame. Each element of the list represents
+#'   each treatment arm. This version of clones must contain a column that
+#'   represents an emulated follow up time (corresponding to `clone_followup`
+#'   argument), an emulated outcome (correspodning to `clone_outcome`), and a
+#'   binary indicator variable that represents whether the observation violates
+#'   arm's policy or not (corresponding to `clone_censoring` argument).
+#' @param clone_followup A column name that represents the emulated follow up
+#'   time in each arm of clones. The variable should exists in each element
+#'   data frame of `clones` argument.
+#' @param clone_outcome A column name that represents the emulated outcome
+#'   in each arm of clones. The variable should exists in each element
+#'   data frame of `clones` argument, and the variable value should be binary
+#'   (0 or 1).
+#' @param clone_censoring A column name that represent whether the observation
+#'   violates arm's policy or not. The variable should exists in each element
+#'   data frame of `clones` argument, and the variable value should be binary
+#'   (0 or 1).
+#' @param col_ids A vector of column names that a combination of their values
+#'   uniquely identifies each observation.
+#' @param timestamp_start A new variable name to denote start time of each
+#'   subrecord of observations in a long-form data.
+#' @param id A new variable name for a unique observation identifier, to
+#'   represents that multiple rows in output data frame is associated with the
+#'   same observation.
+#' @param timestamp_stop A new variable name to denote end time of each
+#'   subrecord of observations in a long-form data.
+#'
+#' @returns A list of long-form data frames. Each data frame represents each
+#'   clone arm. Each row of the long-form data frame represents a subrecord of
+#'   each observation associated with each specific time interval. The first
+#'   subrecord starts with time 0, and the rows are expanded up to
+#'   `clone_followup`, where cut times are determined by `t_events` argument.
+#'
+#' @export
+#' @examples
 create_final_data <- function(
   clones,
   clone_followup,
@@ -490,7 +710,7 @@ create_final_data <- function(
   clone_censoring,
   col_ids,
   timestamp_start = "Tstart",
-  timestamp_id = "ID",
+  id = "ID",
   timestamp_stop = "Tstop"
 ) {
   df_timestamp <- create_timestamp_table(clones, clone_followup)
@@ -501,7 +721,7 @@ create_final_data <- function(
     df_timestamp$tevent,
     clone_outcome,
     timestamp_start,
-    timestamp_id
+    id
   )
 
   clones_splitted_by_censoring <- split_at_timestamp(
@@ -510,7 +730,7 @@ create_final_data <- function(
     df_timestamp$tevent,
     clone_censoring,
     timestamp_start,
-    timestamp_id
+    id
   )
 
   # merge two tables and create column to represent end of timestamp