--- title: "matrix-clustering-CONSTANT-vignette" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{matrix-clustering-CONSTANT-vignette} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` ```{r setup} library(tip) # A function to generate random matrices from a matrix normal distribution random_mat_normal <- function(mu, num_rows, num_cols){ LaplacesDemon::rmatrixnorm(M = matrix(mu, nrow = num_rows, ncol = num_cols), U = diag(num_rows), V = diag(num_cols)) } # Generate 3 clusters of matrices p <- 5 m <- 3 c1 <- lapply(1:10, function(x) random_mat_normal(mu = 0, num_rows = m, num_cols = p)) c2 <- lapply(1:10, function(x) random_mat_normal(mu = 5, num_rows = m, num_cols = p)) c3 <- lapply(1:10, function(x) random_mat_normal(mu = -5, num_rows = m, num_cols = p)) # Put all the data into a list data_list <- c(c1,c2,c3) # Create a vector of true labels. True labels are only necessary # for constructing network graphs that incorporate the true labels; # this is often useful for research. true_labels <- c(rep("Cluster 1", length(c1)), rep("Cluster 2", length(c2)), rep("Cluster 3", length(c3))) distance_matrix <- matrix(NA, nrow = length(true_labels), ncol = length(true_labels)) # Distance matrix for(i in 1:length(true_labels)){ for(j in i:length(true_labels)){ distance_matrix[i,j] <- SMFilter::FDist2(mX = data_list[[i]], mY = data_list[[j]]) distance_matrix[j,i] <- distance_matrix[i,j] } } # Compute the temperature parameter estiamte temperature <- 1/median(distance_matrix[upper.tri(distance_matrix)]) # For each subject, compute the point estimate for the number of similar # subjects using univariate multiple change point detection (i.e.) init_num_neighbors = get_cpt_neighbors(.distance_matrix = distance_matrix) # Set the number of burn-in iterations in the Gibbs samlper # RECOMMENDATION: burn >= 1000 burn <- 10 # Set the number of sampling iterations in the Gibbs sampler # RECOMMENDATION: samples >= 1000 samples <- 10 # Set the subject names names_subjects <- paste(1:dim(distance_matrix)[1]) # Run TIP clustering using only the prior # --> That is, the likelihood function is constant tip1 <- tip(.data = data_list, .burn = burn, .samples = samples, .similarity_matrix = exp(-1.0*temperature*distance_matrix), .init_num_neighbors = init_num_neighbors, .likelihood_model = "CONSTANT", .subject_names = names_subjects, .num_cores = 1) ``` ```{r} # Produce plots for the Bayesian Clustering Model tip_plots <- plot(tip1) ``` ```{r} # View the posterior distribution of the number of clusters tip_plots$histogram_posterior_number_of_clusters ``` ```{r} # View the trace plot with respect to the posterior number of clusters tip_plots$trace_plot_posterior_number_of_clusters ``` ```{r} # Extract posterior cluster assignments using the Posterior Expected Adjusted Rand (PEAR) index cluster_assignments <- mcclust::maxpear(psm = tip1@posterior_similarity_matrix)$cl # If the true labels are available, then show the cluster result via a contigency table table(data.frame(true_label = true_labels, cluster_assignment = cluster_assignments)) ``` ```{r} # Create the one component graph with minimum entropy partition_list <- partition_undirected_graph(.graph_matrix = tip1@posterior_similarity_matrix, .num_components = 1, .step_size = 0.001) ``` ```{r} # Associate class labels and colors for the plot class_palette_colors <- c("Cluster 1" = "blue", "Cluster 2" = 'green', "Cluster 3" = "red") # Associate class labels and shapes for the plot class_palette_shapes <- c("Cluster 1" = 19, "Cluster 2" = 18, "Cluster 3" = 17) # Visualize the posterior similarity matrix by constructing a graph plot of # the one-cluster graph. The true labels are used here (below they are not). ggnet2_network_plot(.matrix_graph = partition_list$partitioned_graph_matrix, .subject_names = NA, .subject_class_names = true_labels, .class_colors = class_palette_colors, .class_shapes = class_palette_shapes, .node_size = 2, .add_node_labels = FALSE) ``` ```{r} # If true labels are not available, then construct a network plot # of the one-cluster graph without any class labels. # Note: Subject labels may be suppressed using .add_node_labels = FALSE. ggnet2_network_plot(.matrix_graph = partition_list$partitioned_graph_matrix, .subject_names = names_subjects, .node_size = 2, .add_node_labels = TRUE) ```