Citation for this R Markdown:

Online Supplement 1 Appendix for the article published in The High School Journal:

Bowers, A.J., Zhao, Y., Ho, E. (2023) Towards Hierarchical Cluster Analysis Heatmaps as Visual Data Analysis of Entire Student Cohort Longitudinal Trajectories and Outcomes from Grade 9 through College. The High School Journal.

Note: Please see the full paper for details. This Online Supplement 1 Appendix to the paper provides the code for the Shiny R app detailed in the full paper.

Contact information: Email: Website: https://www.tc.columbia.edu/faculty/ab3764/

Prerequisites

The R Shiny application can be accessed and used here: https://ohrice.shinyapps.io/Heatmap

This document details the files and directories required to run the Shiny application on your own computer (without using the aforementioned link).

Please note the packages required in the beginning of the server.R file. One important package that must be installed first is the BiocManager package. Please see https://www.bioconductor.org/install/ for more details. Once installed, you will need to install the following packages like so:

BiocManager::install(c("ComplexHeatmap", "hopach"))

server.R

This file uses user input to create the heatmap.

#This sets the max possible file size.
options(shiny.maxRequestSize=90*1024^2) 

library(shiny)

#Run
# if (!requireNamespace("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
# BiocManager::install("ComplexHeatmap")

#then run 
# options(repos = BiocManager::repositories())

library(BiocManager)
options(repos = BiocManager::repositories())

#Run
#BiocManager::install(c("ComplexHeatmap", "hopach"))

library(ComplexHeatmap)
library(hopach)
library(circlize)
library(MASS)
library(pvclust)
library(foreign)
library(tools)
library(Rcpp)
library(lattice)
library(openxlsx)

# This sets up the demo heatmap. Only needs to be done once. 

example = read.csv("data/example.csv", check.names = FALSE)
annocol = grep("[*]", names(example))
heatexample = example[,-annocol]
annotations = example[,annocol]
rownames(heatexample) = example[,1]
heatexample = heatexample[,-1]
heatexample = scale(heatexample)


shinyServer(function(input, output, session) {
   
  #creates preview of demo matrix
  output$headdemo <- renderTable({
    
    head(example)
  })
  
  #creates heatmap of demo matrix
  output$demo <- renderPlot({
    
    
    baseheat = Heatmap(heatexample,heatmap_legend_param = list(title = "legend"), column_title = "Example")
    ht.gender = Heatmap(example[,4],name="Gender",clustering_distance_rows =function(m) as.dist(as.matrix(distancematrix(m, d="cosangle"))),clustering_method_rows = "average",na_col = "white",col = colorRamp2(c(0, 1), c("white", "black")),heatmap_legend_param = list(at = c(0, 1)),width = unit(5, "mm"))
    ht.lunch = Heatmap(example[,5],name="Free/Reduced Lunch",clustering_distance_rows =function(m) as.dist(as.matrix(distancematrix(m, d="cosangle"))),clustering_method_rows = "average",na_col = "white",col = colorRamp2(c(0, 1), c("white", "black")),heatmap_legend_param = list(at = c(0, 1)),width = unit(5, "mm"))
    ht.dropout = Heatmap(example[,8],name="Dropout",clustering_distance_rows =function(m) as.dist(as.matrix(distancematrix(m, d="cosangle"))),clustering_method_rows = "average",na_col = "white",col = colorRamp2(c(0, 1), c("white", "black")),heatmap_legend_param = list(at = c(0, 1)),width = unit(5, "mm"))
    
    final = baseheat + ht.gender + ht.lunch + ht.dropout
    
    final
     })
  
  #creates head of uploaded data
  output$head <- renderTable({
    inFile = input$file1
    
    if (is.null(inFile))
      return(NULL)
    
    if(identical(file_ext(inFile$name), "csv"))
       dataset = read.csv(inFile$datapath, check.names = FALSE)
    else if(identical(file_ext(inFile$name), "xlsx"))
      dataset = read.xlsx(inFile$datapath,1, check.names = FALSE)
    else
      stop('You did not upload a valid csv or xlsx file. Please try again.')
    
    dataset[1:min(5, nrow(dataset)),1:min(5, ncol(dataset))]
  })
  
  #change row name font size
  
  output$rowFont <- renderUI({
    sliderInput('rowFontValue', 'Row Font Size', min = 0, max = 20, value = 10, step = 1)
    
    
  })
  
  #change column name font size
  
  output$columnFont <- renderUI({
    sliderInput('columnFontValue', 'Column Font Size', min = 0, max = 20, value = 10, step = 1)
  })
  
  #turn on or off clustering
  
  output$clustering <- renderUI({
    checkboxGroupInput('cluster', 'Clustering',c("Rows", "Columns"))
  })
  
  #creates variables checkboxes of uploaded data
  
  output$radio <- renderUI({
    inFile = input$file1
    
    if (is.null(inFile))
      return(NULL)
    
    if(identical(file_ext(inFile$name), "csv"))
      dataset = read.csv(inFile$datapath, check.names = FALSE, na.strings = " ")
    else if (identical(file_ext(inFile$name), "xlsx"))
      dataset = read.xlsx(inFile$datapath, 1, check.names = FALSE, na.strings = " ")
    else
      dataset = NULL
      
    annocol = grep("[*]", names(dataset))
    if(length(annocol)!=0){
      annocolnames = colnames(dataset)[annocol]
    }
    else{
      annocolnames = NULL
    }
    
    checkboxGroupInput('vars','Annotation Variables',annocolnames)
  })
  
  #creates heatmap of uploaded data
  output$heatmap <- renderPlot({
    
    inFile = input$file1
    
    if (is.null(inFile))
      return(NULL)
    
    
    if(identical(file_ext(inFile$name), "csv")){
        #check.names preserves the asterisk character, used to denote column for annotation
 
        dataset = read.csv(inFile$datapath, check.names = FALSE, na.strings = " ")
    }
    
    if(identical(file_ext(inFile$name), "xlsx")){
      
        dataset = read.xlsx(inFile$datapath,1, check.names = FALSE, na.strings = " ")
    }
               
    validate(
        need(!any(duplicated(dataset[,1])), "You have duplicate row names. Please fix your file and try again."),
        need(!anyNA(dataset), "You have NA values in your dataset. Please fix your file and try again."),
        need(all(sapply(dataset[,-1], is.numeric)), "Your data needs to be numeric. Please fix your file and try again.")
    )
        
    heatmaptitle = inFile$name
        
    #indices of columns that have asterisks/will be annotations
    annocol = grep("[*]", names(dataset))
        
    if(length(annocol) !=0){
        #heatdata contains data for heatmap without annotations
        heatdata = dataset[,-grep("[*]",names(dataset))]
    }
    else{
        heatdata = dataset
    }
        
    #set row names
    rownames(heatdata) = dataset[,1]
        
    #we don't need to graph the ID. Also, scale
    heatdata = heatdata[,-1, drop = FALSE]
    heatdata = scale(heatdata)
        
    #By default, heatmap has no annotations
    heatmap = Heatmap(heatdata, 
                      heatmap_legend_param = list(title = "legend"),
                      column_title = heatmaptitle,
                      cluster_rows = ("Rows" %in% input$cluster),
                      cluster_columns = ("Columns" %in% input$cluster),
                      row_names_gp = gpar(fontsize = input$rowFontValue),
                      column_names_gp = gpar(fontsize = input$columnFontValue))
        
    #If user interacts with the check boxes, annotations may be shown
    if(!is.null(annocol)){
       if(!is.null(input$vars)){
          vec = unlist(input$vars)
          annodata = dataset[,vec,drop=FALSE]
          for(i in 1:length(vec)){
            annotation = Heatmap(annodata[,i],name=names(annodata)[i],
                                 clustering_distance_rows =function(m) as.dist(as.matrix(distancematrix(m, d="cosangle"))),
                                 clustering_method_rows = "average",
                                 na_col = "white",col = colorRamp2(c(0, 1), c("white", "black")),
                                 heatmap_legend_param = list(at = c(0, 1)),width = unit(5, "mm"))
            heatmap = heatmap + annotation
          }
       }
    } 
        
        
    heatmap
   
  }, 
    height = function(){
    max(session$clientData$output_heatmap_height, session$clientData$output_heatmap_width)
  },
    width = function(){
      max(session$clientData$output_heatmap_height, session$clientData$output_heatmap_width)
    }
  )
  
})

ui.R

This file handles the user interface and allows users to upload their data file and make adjustments to the heatmap.

#
# This is the user-interface definition of a Shiny web application. You can
# run the application by clicking 'Run App' above.
#
# Find out more about building applications with Shiny here:
# 
#    http://shiny.rstudio.com/
#

library(shiny)

# Define UI for application that draws a histogram
shinyUI(fluidPage(
  
  # Application title
  titlePanel("Heatmap Cluster Analysis"),
  
  # Sidebar with a slider input for number of bins 
  sidebarLayout(
    sidebarPanel(
      strong("Warning"),
      br(),
      a("Please click for the disclaimer.", href = "TermsOfUse.pdf", target = "_blank"),
      
      p("This disclaimer governs your use of this website; by using this website, 
        you accept this disclaimer in full. If you disagree with any part of this disclaimer, 
        you should not proceed with using this site. We reserve the right to modify these terms 
        at any time. You should, therefore, check periodically for changes. By using this site 
        after we post any changes, you agree to accept those changes, whether or not you have reviewed them."),
      br(),
      strong("Agreeing to Terms"),
      p("In clicking an 'I accept' button, you are stating that you have read and agreed to the conditions 
        listed in the Website Terms of Use."),
      
      radioButtons("disclaimer", NULL, c("I don't accept" = "!accept","I accept" = "accept")),
      
      conditionalPanel("input.disclaimer=='accept'",
                       fileInput('file1', 'Choose a .xlsx or .csv file',
                                 accept=c('application/x-spss-sav','text/csv', 'application/octet-stream',
                                          'text/comma-separated-values',
                                          'text/tab-separated-values',
                                          'text/plain',
                                          '.csv',
                                          '.xlsx',
                                          '.tsv')),
                       p("Click on the variables to see their annotations."),
                       uiOutput('radio'),
                       p("Change the font sizes of the row names"),
                       uiOutput('rowFont'),
                       p("Change the font sizes of the column names"),
                       uiOutput('columnFont'),
                       p("Turn clustering on or off for rows and/or columns"),
                       uiOutput('clustering')
      ),
      
      em("This application is created by and copyrighted 2016 by Alex J. Bowers, Yihan Zhao, and Eric Ho"),
      
      br(),
      br(),
      
      em("When using this website, please cite this application as:"),

      p("Ho, E., Bowers, A.J., & Zhao, Y. (2016) Heatmap cluster analysis [shiny R application]. https://ohrice.shinyapps.io/Heatmap/"),
      
      em("This website is based on the work from the Bowers Research Group in Education 
        Leadership Data Analytics at Teachers College, Columbia University, New York, New York. 
        For more information on the work of the research group, please visit"), 
        a("http://www.tc.columbia.edu/faculty/ab3764/", href ="http://www.tc.columbia.edu/faculty/ab3764/", target ="_blank")
      
      
      
    ),
    
    
    mainPanel(
       p("You must accept the terms and conditions in the pdf to use this app."),
       br(),
   
       conditionalPanel("input.disclaimer=='accept'",
        p("A preview of your data will be shown here once you upload a file. Please make sure it is formatted correctly."),
        tableOutput("head"),
        p("Below is the heatmap of your data once you upload a file. The heatmap may take some time to appear."),
        plotOutput("heatmap", width = "100%")
       ),
       
       conditionalPanel("input.disclaimer == '!accept'",
                        p("This is an example."),
                        tableOutput("headdemo"),
                        plotOutput("demo")
                        #tags$iframe(src="TermsOfUse.pdf", height = '0px', '0px')
                        )
                  

    )
  )
))

data (directory)

This folder contains sample datasets, including the one used to generate the example heatmap on the landing page of the application.

rsconnect (directory)

This folder contains information required to deploy the application to www.shinyapps.io

www (directory)

This folder contains the disclaimer that users are required to accept to use the application.