--- title: "usarrests-NIW-vignette" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{usarrests-NIW-vignette} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` ```{r setup} # Import the TIP library library(tip) # Import the US Arrests dataset data(USArrests) # Convert the data to a matrix X <- data.matrix(USArrests) # Compute the distance matrix distance_matrix <- data.matrix(dist(X)) # Compute the temperature parameter estiamte temperature <- 1/median(distance_matrix[upper.tri(distance_matrix)]) # For each subject, compute the point estimate for the number of similar # subjects using univariate multiple change point detection (i.e.) init_num_neighbors = get_cpt_neighbors(.distance_matrix = distance_matrix) # Set the number of burn-in iterations in the Gibbs samlper # RECOMENDATION: *** burn >= 1000 *** burn <- 10 # Set the number of sampling iterations in the Gibbs sampler # RECOMENDATION: *** samples >= 1000 *** samples <- 10 # Extract the state names names_subjects <- rownames(USArrests) # Run TIP clustering using only the prior # --> That is, the likelihood function is constant tip1 <- tip(.data = data.matrix(X), .burn = burn, .samples = samples, .similarity_matrix = exp(-1.0*temperature*distance_matrix), .init_num_neighbors = init_num_neighbors, .likelihood_model = "NIW", .subject_names = names_subjects, .num_cores = 1) ``` ```{r} # Produce plots for the Bayesian Clustering Model tip_plots <- plot(tip1) ``` ```{r} # View the posterior distribution of the number of clusters tip_plots$trace_plot_posterior_number_of_clusters ``` ```{r} # View the trace plot with respect to the posterior number of clusters tip_plots$trace_plot_posterior_number_of_clusters ``` ```{r} # Extract posterior cluster assignments using the Posterior Expected Adjusted Rand (PEAR) index cluster_assignments <- mcclust::maxpear(psm = tip1@posterior_similarity_matrix)$cl # Create a list where each element stores the cluster assignments cluster_assignment_list <- list() for(k in 1:length(unique(cluster_assignments))){ cluster_assignment_list[[k]] <- names_subjects[which(cluster_assignments == k)] } cluster_assignment_list ``` ```{r} # Create the one component graph with minimum entropy partition_list <- partition_undirected_graph(.graph_matrix = tip1@posterior_similarity_matrix, .num_components = 1, .step_size = 0.001) ``` ```{r} # View the state names # names_subjects # Create a vector of true region labels to see if there is a pattern true_region <- c("Southeast", "West", "Southwest", "Southeast", "West", "West", "Northeast", "Northeast", "Southeast", "Southeast", "West", "West", "Midwest", "Midwest", "Midwest", "Midwest", "Southeast", "Southeast", "Northeast", "Northeast", "Northeast", "Midwest", "Midwest", "Southeast", "Midwest", "West", "Midwest", "West", "Northeast", "Northeast", "Southwest", "Northeast", "Southeast", "Midwest", "Midwest", "Southwest", "West", "Northeast", "Northeast", "Southeast", "Midwest", "Southeast", "Southwest", "West", "Northeast", "Southeast", "West", "Southeast", "Midwest", "West") names_subjects # Associate class labels and colors for the plot class_palette_colors <- c("Northeast" = "blue", "Southeast" = "red", "Midwest" = "purple", "Southwest" = "orange", "West" = "green") # Associate class labels and shapes for the plot class_palette_shapes <- c("Northeast" = 15, "Southeast" = 16, "Midwest" = 17, "Southwest" = 18, "West" = 19) # Visualize the posterior similarity matrix by constructing a graph plot of # the one-cluster graph. The true labels are used here (below they are not). ggnet2_network_plot(.matrix_graph = partition_list$partitioned_graph_matrix, .subject_names = names_subjects, .subject_class_names = true_region, .class_colors = class_palette_colors, .class_shapes = class_palette_shapes, .node_size = 2, .add_node_labels = TRUE) ``` ```{r} # Visualize the posterior similarity matrix by constructing a graph plot of # the one-cluster graph. The true labels are used here (below they are not). # Remove the subject names with .add_node_labels = FALSE ggnet2_network_plot(.matrix_graph = partition_list$partitioned_graph_matrix, .subject_names = names_subjects, .subject_class_names = true_region, .class_colors = class_palette_colors, .class_shapes = class_palette_shapes, .node_size = 2, .add_node_labels = FALSE) ``` ```{r} # Construct a network plot without class labels # Note: Subject labels may be suppressed using .add_node_labels = FALSE. ggnet2_network_plot(.matrix_graph = partition_list$partitioned_graph_matrix, .subject_names = names_subjects, .node_size = 2, .add_node_labels = TRUE) ```