#' @title 
#' Create trans_diff object for the differential analysis on the taxonomic abundance.
#'
#' @description
#' This class is a wrapper for a series of differential abundance test and indicator analysis methods, including non-parametric Kruskal-Wallis Rank Sum Test,
 #' Dunn's Kruskal-Wallis Multiple Comparisons based on the FSA package, LEfSe based on the Segata et al. (2011) <doi:10.1186/gb-2011-12-6-r60>,
#'  random forest <doi:10.1016/j.geoderma.2018.09.035>, metastat based on White et al. (2009) <doi:10.1371/journal.pcbi.1000352> and
#' the method in R package metagenomeSeq Paulson et al. (2013) <doi:10.1038/nmeth.2658>.
#' 
#' Authors: Chi Liu, Yang Cao, Chenhao Li
#' 
#' @export
trans_diff <- R6Class(classname = "trans_diff",
	public = list(
		#' @param dataset the object of \code{\link{microtable}} Class.
		#' @param method default "lefse"; one of "lefse", "rf", "KW", "KW_dunn", "metastat" or "mseq"; see the following details:
		#'   \describe{
		#'     \item{\strong{'lefse'}}{from Segata et al. (2011) <doi:10.1186/gb-2011-12-6-r60>}
		#'     \item{\strong{'rf'}}{random forest, from An et al. (2019) <doi:10.1016/j.geoderma.2018.09.035>}
		#'     \item{\strong{'KW'}}{Kruskal-Wallis Rank Sum Test (groups > 2) or Wilcoxon Rank Sum Tests (groups = 2) for a specific taxonomic level or 
		#'       all levels of microtable$taxa_abund}
		#'     \item{\strong{'KW_dunn'}}{Dunn's Kruskal-Wallis Multiple Comparisons based on the FSA package}
		#'     \item{\strong{'metastat'}}{White et al. (2009) <doi:10.1371/journal.pcbi.1000352>}
		#'     \item{\strong{'mseq'}}{the method based on the zero-inflated log-normal model in metagenomeSeq package.}
		#'   }
		#' @param group default NULL; sample group used for main comparision.
		#' @param taxa_level default "all"; use abundance data at all taxonomic ranks; For testing at a specific rank, provide taxonomic rank name, such as "Genus".
		#' @param filter_thres default 0; the relative abundance threshold used for method = "lefse" or "rf". 
		#' @param lefse_subgroup default NULL; sample sub group used for sub-comparision in lefse; Segata et al. (2011) <doi:10.1186/gb-2011-12-6-r60>.
		#' @param alpha default .05; significance threshold.
		#' @param lefse_min_subsam default 10; sample numbers required in the subgroup test.
		#' @param lefse_norm default 1000000; scale value in lefse.
		#' @param nresam default .6667; sample number ratio used in each bootstrap or LEfSe or random forest.
		#' @param boots default 30; bootstrap test number for lefse or rf.
		#' @param rf_ntree default 1000; see ntree in randomForest function of randomForest package.
		#' @param metastat_taxa_level default "Genus"; taxonomic rank level used in metastat test; White et al. (2009) <doi:10.1371/journal.pcbi.1000352>.
		#' @param group_choose_paired default NULL; a vector used for selecting the required groups for paired testing, only used for metastat or mseq.
		#' @param mseq_adjustMethod default "fdr"; Method to adjust p-values by. Default is "fdr". 
		#'   Options include "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none".
		#' @param mseq_count default 1; Filter features to have at least 'counts' counts.; see the count parameter in MRcoefs function of metagenomeSeq package.
		#' @param ... parameters passed to kruskal.test function or wilcox.test function (method = "KW") or dunnTest function of FSA package (method = "KW_dunn").
		#' @return res_rf, res_lefse, res_abund, res_diff, res_metastat, or res_mseq in trans_diff object, depending on the method.
		#' @examples
		#' \donttest{
		#' data(dataset)
		#' t1 <- trans_diff$new(dataset = dataset, method = "lefse", group = "Group")
		#' }
		initialize = function(
			dataset = NULL,
			method = c("lefse", "rf", "KW", "KW_dunn", "metastat", "mseq")[1],
			group = NULL,
			taxa_level = "all",
			filter_thres = 0,
			lefse_subgroup = NULL,
			alpha = 0.05,
			lefse_min_subsam = 10,
			lefse_norm = 1000000,
			nresam = 0.6667,
			boots = 30,
			rf_ntree = 1000,
			metastat_taxa_level = "Genus",
			group_choose_paired = NULL,
			mseq_adjustMethod = "fdr",
			mseq_count = 1,
			...
			){
			if(is.null(dataset)){
				stop("No dataset provided!")
			}
			
			method <- match.arg(method, c("lefse", "rf", "KW", "KW_dunn", "metastat", "mseq"))

			sampleinfo <- dataset$sample_table
			sampleinfo[, group] %<>% as.character
#			self$method <- method
			if(grepl("lefse|rf|KW|KW_dunn", method, ignore.case = TRUE)){
				if(is.null(dataset$taxa_abund)){
					stop("Please first calculate taxa_abund! see cal_abund function in microtable class!")
				}
				if(grepl("all", taxa_level, ignore.case = TRUE)){
					abund_table <- do.call(rbind, unname(dataset$taxa_abund))
				}else{
					abund_table <- dataset$taxa_abund[[taxa_level]]
				}
				if(filter_thres > 0){
					if(filter_thres >= 1){
						stop("Parameter filter_thres represents relative abudance. It should be smaller than 1 !")
					}else{
						abund_table %<>% .[apply(., 1, mean) > filter_thres, ]
					}
				}
				if(grepl("lefse", method, ignore.case = TRUE)){
					abund_table %<>% {. * lefse_norm}
					self$lefse_norm <- lefse_norm
				}
				abund_table %<>% {.[!grepl("__$|uncultured$|Incertae..edis$|_sp$", rownames(.), ignore.case = TRUE), ]}
			}
			if(method %in% c("KW", "KW_dunn")){
				tem_data <- clone(dataset)
				# use test method in trans_alpha
				tem_data$alpha_diversity <- as.data.frame(t(abund_table))
				tem_data1 <- suppressMessages(trans_alpha$new(dataset = tem_data, group = group))
				suppressMessages(tem_data1$cal_diff(method = method, ...))
				self$res_diff <- tem_data1$res_alpha_diff
				message('The result is stored in object$res_diff ...')
			}
			if(grepl("lefse|rf", method, ignore.case = TRUE)){
				# differential test
				group_vec <- sampleinfo[, group] %>% as.factor
				message("Start differential test for ", group, " ...")
				res_class <- lapply(seq_len(nrow(abund_table)), function(x) private$test_mark(abund_table[x, ], group_vec))
				pvalue <- unlist(lapply(res_class, function(x) x$p_value))
				pvalue[is.nan(pvalue)] <- 1
				# select significant taxa
				sel_taxa <- pvalue < alpha
				message("Total ", sum(sel_taxa), " biomarkers found ...")
				if(sum(sel_taxa) == 0){
					stop("No significant biomarkers found! Stop running!")
				}
				# save abund_table for the cladogram
				self$abund_table <- abund_table
				abund_table_sub <- abund_table[sel_taxa, ]
				pvalue_sub <- pvalue[sel_taxa]
			}
			if(grepl("rf", method, ignore.case = TRUE)){
				names(pvalue_sub) <- rownames(abund_table_sub)
				# change the name in case of additional problem from the taxonomic names
				nametable <- cbind.data.frame(name = rownames(abund_table_sub), repl = paste0("t", 1:nrow(abund_table_sub)), stringsAsFactors = FALSE)
				rownames(nametable) <- nametable$repl
				predictors <- t(abund_table_sub)
				colnames(predictors) <- nametable$repl
				res <- NULL
				if(all(table(as.character(sampleinfo[, group])) < 4)){
					if(nresam < 1){
						message("The sample number in all your group less than 4, automatically set nresam = 1 for random forest analysis !")
						nresam <- 1
					}
				}
				for(num in seq_len(boots)){
					# resampling
					sample_names_resample <- rownames(predictors)[base::sample(1:nrow(predictors), size = ceiling(nrow(predictors) * nresam))]
					predictors_sub <- predictors[sample_names_resample, ]
					sampleinfo_resample <- sampleinfo[sample_names_resample, , drop = FALSE]
					# make sure the groups and samples numbers right
					if(length(unique(sampleinfo_resample[, group])) != length(unique(sampleinfo[, group])) | min(table(sampleinfo_resample[, group])) < 2){
						next
					}
					rf_data <- data.frame(response = as.factor(sampleinfo_resample[, group]), predictors_sub, stringsAsFactors = FALSE)
					tem_classify <- randomForest::randomForest(response~., data = rf_data, ntree = rf_ntree)
					# use importance to evaluate
					imp <- randomForest::importance(tem_classify)
					colnames(imp)[1] <- num
					if(is.null(res)){
						res <- imp
					}else{
						res <- cbind(res, imp[rownames(res), , drop = FALSE])
					}
				}
				res <- apply(res, 1, mean) %>% as.data.frame	
				res <- data.frame(Taxa = rownames(res), MeanDecreaseGini = res[, 1], stringsAsFactors = FALSE)
				res$Taxa <- nametable[res$Taxa, "name"]
				imp_sort <- dplyr::arrange(res, dplyr::desc(MeanDecreaseGini))
				rownames(imp_sort) <- imp_sort$Taxa
				imp_sort$pvalue <- pvalue_sub[as.character(imp_sort$Taxa)]
				self$res_rf <- imp_sort
				message('The result is stored in object$res_rf ...')
			}
			if(grepl("lefse", method, ignore.case = TRUE)){
				class_taxa_median_sub <- lapply(res_class, function(x) x$med) %>% do.call(cbind, .) %>% .[, sel_taxa]
				all_class_pairs <- combn(unique(as.character(group_vec)), 2)
				# check the difference among subgroups
				if(!is.null(lefse_subgroup)){
					message("Start lefse subgroup biomarkers check for ", lefse_subgroup, " ...")
					all_sub_number <- as.data.table(sampleinfo)[, .N, by = c(group, lefse_subgroup)] %>% as.data.frame %>% .$N
					if(all(all_sub_number < lefse_min_subsam)){
						stop("All sample numbers for subgroups < ", lefse_min_subsam, "! Please consider use small lefse_min_subsam parameter!")
					}
					remove_list_total <- list()
					# for each group paires
					for(i in seq_len(ncol(all_class_pairs))){
						y1 <- all_class_pairs[, i]
						y1_sub_pairs <- expand.grid(
							unique(sampleinfo[sampleinfo[, group] == y1[1], lefse_subgroup]), 
							unique(sampleinfo[sampleinfo[, group] == y1[2], lefse_subgroup]), 
							stringsAsFactors = FALSE
							) %>% t
						y1_sub_pairs <- y1_sub_pairs[, unlist(lapply(1:ncol(y1_sub_pairs), function(x){
							ifelse(any(c(sum(sampleinfo[, group] == y1[1] & sampleinfo[, lefse_subgroup] == y1_sub_pairs[1, x]) < lefse_min_subsam, 
								sum(sampleinfo[, group] == y1[2] & sampleinfo[, lefse_subgroup] == y1_sub_pairs[2, x]) < lefse_min_subsam)), FALSE, TRUE)
						})), drop = FALSE]
						if(ncol(y1_sub_pairs) == 0) next
						res_sub_total <- list()
						# check each subgroup pairs under fixed group pair condition
						for(j in 1:ncol(y1_sub_pairs)){
							y2 <- y1_sub_pairs[, j]
							abund_table_sub_y2 <- abund_table_sub[, c(
								rownames(sampleinfo[sampleinfo[, group] == y1[1] & sampleinfo[, lefse_subgroup] == y2[1], ]), 
								rownames(sampleinfo[sampleinfo[, group] == y1[2] & sampleinfo[, lefse_subgroup] == y2[2], ])
								)]
							group_vec_sub2 <- c(sampleinfo[sampleinfo[, group] == y1[1] & sampleinfo[, lefse_subgroup] == y2[1], group], 
								sampleinfo[sampleinfo[, group] == y1[2] & sampleinfo[, lefse_subgroup] == y2[2], group])
							res_sub <- lapply(seq_len(nrow(abund_table_sub_y2)), function(x) private$test_mark(abund_table_sub_y2[x,], group_vec_sub2))
							res_sub_total[[j]] <- res_sub
						}
						raw_median <- class_taxa_median_sub[y1, ] %>% 
							{.[1, ] > .[2, ]} %>% 
							as.vector
						check_median_sub <- sapply(res_sub_total, function(x) unlist(lapply(x, function(y) {y$med[y1, 1] %>% {.[1] > .[2]}}))) %>% as.data.frame
						check_median_sub[] <- lapply(check_median_sub, function(x) x == raw_median)
						check_p_sub <- sapply(res_sub_total, function(x) unlist(lapply(x, function(y) y$p_value))) %>% as.data.frame
						remove_list <- unlist(lapply(seq_len(nrow(check_median_sub)), function(x){
							if(all(unlist(check_median_sub[x, ]))){
								FALSE
							}else{
								if(any(check_p_sub[x, !unlist(check_median_sub[x, ])] < alpha)){
									TRUE
								}else{
									FALSE
								}
							}
						}))
						remove_list_total[[i]] <- remove_list
					}
					if(!identical(remove_list_total, list())){
						remove_list_total %<>% do.call(cbind, .) %>% apply(., 1, any)
						message("Remove ", sum(remove_list_total), " biomarkers after subgroup check ...")
						abund_table_sub %<>% .[!remove_list_total, ]
						if(nrow(abund_table_sub) == 0){
							stop("No biomarkers remained after subgroup check! stop running!")
						}
						pvalue_sub %<>% .[!remove_list_total]
						class_taxa_median_sub %<>% .[, !remove_list_total]
					}
				}
				res_lda <- list()
				# bootstrap default 30 times
				for(num in seq_len(boots)){
					res_lda_pair <- list()
					# resampling samples
					sample_names_resample <- colnames(abund_table_sub)[base::sample(1:ncol(abund_table_sub), size = ceiling(ncol(abund_table_sub) * nresam))]
					abund_table_sub_resample <- abund_table_sub[, sample_names_resample]
					sampleinfo_resample <- sampleinfo[sample_names_resample, , drop = FALSE]
					# make sure the groups and samples numbers available					
					if(sum(table(as.character(sampleinfo_resample[, group])) > 1) < 2){
						res_lda[[num]] <- NA
						next
					}
					# cycle all paired groups
					for(i in seq_len(ncol(all_class_pairs))){
						sel_samples <- sampleinfo_resample[, group] %in% all_class_pairs[, i]
						# make sure the groups and samples numbers available
						if(length(table(as.character(sampleinfo_resample[sel_samples, group]))) < 2){
							res_lda_pair[[i]] <- NA
							next
						}
						if(min(table(as.character(sampleinfo_resample[sel_samples, group]))) < 2){
							res_lda_pair[[i]] <- NA
							next
						}
						group_vec_lda <- sampleinfo_resample[sel_samples, group] %>% as.character %>% as.factor
						abund_table_sub_lda <- abund_table_sub_resample[, sel_samples]
						abund_table_sub_lda %<>% .[apply(., 1, sd) > 1.0e-10, ]
						if(is.null(lefse_subgroup)){
							abund1 <- cbind.data.frame(t(abund_table_sub_lda), Group = group_vec_lda)
						}else{
							subgroup_vec <- sampleinfo_resample[sel_samples, lefse_subgroup] %>% as.character %>% as.factor
							# consider subgroup as a independent variable
							abund1 <- cbind.data.frame(t(abund_table_sub_lda), Group = group_vec_lda, lefse_subgroup = subgroup_vec)
						}
						# LDA analysis
						check_res <- tryCatch(mod1 <- MASS::lda(Group ~ ., abund1, tol = 1.0e-10), error = function(e) { skip_to_next <- TRUE})
						if(rlang::is_true(check_res)) {
							res_lda_pair[[i]] <- NA
							next
						}else{
							# calculate effect size
							w <- mod1$scaling[,1]
							w_unit <- w/sqrt(sum(w^2))
							w_unit %<>% {.[!grepl("lefse_subgroup", names(.))]}
							ss <- abund1[, !colnames(abund1) %in% c("Group", "lefse_subgroup")]
							xy_matrix <- as.matrix(ss)
							LD <- xy_matrix %*% w_unit
							effect_size <- tapply(LD, group_vec_lda, mean) %>% as.vector %>% {.[1] - .[2]} %>% abs
							coeff <- w_unit * effect_size %>% abs
							coeff[is.nan(coeff)] <- 0
							names(coeff) %<>% gsub("^`|`$", "", .)
							rres <- mod1$means %>% as.data.frame
							colnames(rres) %<>% gsub("^`|`$", "", .)
							rres <- rres[, rownames(abund_table_sub_lda)]
							rres1 <- apply(rres, 2, function(x) abs(x[1] - x[2]))
							res_lda_pair[[i]] <- (rres1 + coeff[names(rres1)]) *0.5
						}
					}
					res_lda[[num]] <- res_lda_pair
				}
				# obtain the final lda value
				res <- sapply(rownames(abund_table_sub), function(k){
					unlist(lapply(seq_len(ncol(all_class_pairs)), function(p){
						unlist(lapply(res_lda, function(x){ x[[p]][k]})) %>% .[!is.na(.)] %>% mean
					})) %>% 
					.[!is.na(.)] %>% 
					.[!is.nan(.)] %>% 
					max
				})
				res <- sapply(res, function(x) {log10(1 + abs(x)) * ifelse(x > 0, 1, -1)})
				res1 <- cbind.data.frame(Group = apply(class_taxa_median_sub, 2, function(x) rownames(class_taxa_median_sub)[which.max(x)]), 
					pvalue = pvalue_sub, LDA = res)
				res1 %<>% .[order(.$LDA, decreasing = TRUE), ]
				res1 <- cbind.data.frame(Taxa = rownames(res1), res1)
				message("Finished, minimum LDA score: ", range(res1$LDA)[1], " maximum LDA score: ", range(res1$LDA)[2])
				self$res_lefse <- res1
				message('The lefse result is stored in object$res_lefse ...')
			}
			if(grepl("lefse|rf", method, ignore.case = TRUE)){
				if(grepl("lefse", method, ignore.case = TRUE)){
					res_abund <- reshape2::melt(rownames_to_column(abund_table_sub/lefse_norm, "Taxa"), id.vars = "Taxa")
				}else{
					res_abund <- reshape2::melt(rownames_to_column(abund_table_sub, "Taxa"), id.vars = "Taxa")				
				}
				colnames(res_abund) <- c("Taxa", "Sample", "Abund")
				res_abund <- suppressWarnings(dplyr::left_join(res_abund, rownames_to_column(sampleinfo), by = c("Sample" = "rowname")))
				res_abund <- microeco:::summarySE_inter(res_abund, measurevar = "Abund", groupvars = c("Taxa", group))
				colnames(res_abund)[colnames(res_abund) == group] <- "Group"
				self$res_abund <- res_abund
				message('The abundance is stored in object$res_abund ...')
			}
			if(grepl("metastat|mseq", method, ignore.case = TRUE)){
				if(is.null(group_choose_paired)){
					all_name <- combn(unique(as.character(sampleinfo[, group])), 2)
				}else{
					all_name <- combn(unique(group_choose_paired), 2)
				}
				output <- data.frame()			
			}
			if(grepl("metastat", method, ignore.case = TRUE)){
				self$metastat_taxa_level <- metastat_taxa_level
				# transform data
				ranknumber <- which(colnames(dataset$tax_table) %in% metastat_taxa_level)
				abund <- dataset$otu_table
				tax <- dataset$tax_table[, 1:ranknumber, drop=FALSE]
				merged_taxonomy <- apply(tax, 1, paste, collapse="|")
				abund1 <- cbind.data.frame(Display = merged_taxonomy, abund) %>% 
					reshape2::melt(id.var = "Display", value.name= "Abundance", variable.name = "Sample")
				abund1 <- data.table(abund1)[, sum_abund:=sum(Abundance), by=list(Display, Sample)] %>% 
					.[, c("Abundance"):=NULL] %>% 
					setkey(Display, Sample) %>% 
					unique() %>% 
					as.data.frame()
				new_abund <- as.data.frame(data.table::dcast(data.table(abund1), Display~Sample, value.var= list("sum_abund"))) %>% 
					`row.names<-`(.[,1]) %>% 
					.[,-1, drop = FALSE]
				new_abund <- new_abund[order(apply(new_abund, 1, mean), decreasing = TRUE), rownames(sampleinfo), drop = FALSE]

				message("Total ", ncol(all_name), " paired group for calculation ...")
				for(i in 1:ncol(all_name)) {
					message(paste0("Run ", i, " : ", paste0(as.character(all_name[,i]), collapse = " vs "), " ...\n"))
					use_data <- new_abund[ , unlist(lapply(as.character(all_name[,i]), function(x) which(as.character(sampleinfo[, group]) %in% x)))]
					use_data %<>% .[!grepl("__$", rownames(.)), ]
					use_data <- use_data[apply(use_data, 1, sum) != 0, ]

					g <- sum(as.character(sampleinfo[, group]) == as.character(all_name[1, i])) + 1
					# calculate metastat
					res <- private$calculate_metastat(inputdata = use_data, g = g)
					add_name <- paste0(as.character(all_name[, i]), collapse = " vs ") %>% rep(., nrow(res))
					res <- cbind.data.frame(compare = add_name, res)
					output <- rbind.data.frame(output, res)
				}
				output %<>% dropallfactors(unfac2num = TRUE)
				self$res_metastat <- output
				message('The metastat result is stored in object$res_metastat ...')
				self$res_metastat_group_matrix <- all_name
				message('The metastat group information is stored in object$res_metastat_group_matrix ...')
			}
			if(grepl("mseq", method, ignore.case = TRUE)){
				if(!require("metagenomeSeq")){
					stop("metagenomeSeq package not installed")
				}
				message("Total ", ncol(all_name), " paired group for calculation ...")
				for(i in 1:ncol(all_name)) {
					message(paste0("Run ", i, " : ", paste0(as.character(all_name[,i]), collapse = " vs "), " ...\n"))
					use_dataset <- clone(dataset)
					use_dataset$sample_table %<>% .[.[, group] %in% as.character(all_name[,i]), ]
					use_dataset$tidy_dataset()
					obj <- newMRexperiment(
						use_dataset$otu_table, 
						phenoData= AnnotatedDataFrame(use_dataset$sample_table), 
						featureData = AnnotatedDataFrame(use_dataset$tax_table)
						)
					## Normalization and Statistical testing
					obj_1 <- cumNorm(obj)
					pd <- pData(obj)
					colnames(pd)[which(colnames(pd) == group)] <- "Group"
					# construct linear model
					mod <- model.matrix(~1 + Group, data = pd)
					objres1 <- fitFeatureModel(obj_1, mod)
					# extract the result
					tb <- data.frame(logFC = objres1@fitZeroLogNormal$logFC, se = objres1@fitZeroLogNormal$se)
					p <- objres1@pvalues
					if (mseq_adjustMethod == "ihw-ubiquity" | mseq_adjustMethod == "ihw-abundance") {
						padj <- MRihw(objres1, p, mseq_adjustMethod, 0.1)
					} else {
						padj <- p.adjust(p, method = mseq_adjustMethod)
					}
					srt <- order(p, decreasing = FALSE)
					valid <- 1:length(padj)
					if (mseq_count > 0) {
						np <- rowSums(objres1@counts)
						valid <- intersect(valid, which(np >= mseq_count))
					}
					srt <- srt[which(srt %in% valid)]
					res <- cbind(tb[, 1:2], p)
					res <- cbind(res, padj)
					res <- as.data.frame(res[srt, ])
					colnames(res) <- c(colnames(tb)[1:2], "pvalues", "adjPvalues")
					res <- cbind.data.frame(feature = rownames(res), res)
					rownames(res) <- NULL
					add_name <- paste0(as.character(all_name[, i]), collapse = " vs ") %>% rep(., nrow(res))
					res <- cbind.data.frame(compare = add_name, res)
					output <- rbind.data.frame(output, res)
				}
				output %<>% dropallfactors(unfac2num = TRUE)
				self$res_mseq <- output
				message('The result is stored in object$res_mseq ...')
				self$res_mseq_group_matrix <- all_name
				message('The group information is stored in object$res_mseq_group_matrix ...')
			}
		},
		#' @description
		#' Plotting the abundance of differential taxa.
		#'
		#' @param method default NULL; "rf" or "lefse"; automatically check the method in the result.
		#' @param only_abund_plot default TRUE; if true, return only abundance plot; if false, return both indicator plot and abundance plot
		#' @param use_number default 1:10; vector, the taxa numbers used in the plot, 1:n.
		#' @param color_values colors for presentation.
		#' @param plot1_bar_color default "grey30"; the color for the plot 1.
		#' @param plot2_sig_color default "red"; the color for the significance in plot 2.
		#' @param plot2_sig_size default 1.5; the size for the significance in plot 2.
		#' @param axis_text_y default 12; the size for the y axis text.
		#' @param simplify_names default TRUE; whether use the simplified taxonomic name.
		#' @param keep_prefix default TRUE; whether retain the taxonomic prefix.
		#' @param group_order default NULL; a vector to order the legend in plot.
		#' @param plot2_barwidth default .9; the bar width in plot 2.
		#' @param add_significance default TRUE; whether add the significance asterisk; only available when only_abund_plot FALSE.
		#' @param use_se default TRUE; whether use SE in plot 2, if FALSE, use SD.
		#' @return ggplot.
		#' @examples
		#' \donttest{
		#' t1$plot_diff_abund(use_number = 1:10)
		#' }
		plot_diff_abund = function(
			method = NULL,
			only_abund_plot = TRUE,
			use_number = 1:10,
			color_values = RColorBrewer::brewer.pal(8, "Dark2"),
			plot1_bar_color = "grey50",
			plot2_sig_color = "red",
			plot2_sig_size = 1.2,
			axis_text_y = 10,
			simplify_names = TRUE,
			keep_prefix = TRUE,
			group_order = NULL,
			plot2_barwidth = .9,
			add_significance = TRUE,
			use_se = TRUE
			){
			data2 <- self$res_abund
			if(is.null(method)){
				if(!is.null(self$res_lefse) & !is.null(self$res_rf)){
					method <- "lefse"
				}else{
					if(!is.null(self$res_lefse)){
						method <- "lefse"
					}else{
						if(!is.null(self$res_rf)){
							method <- "rf"
						}else{
							stop("No lefse or randomForest result found!")
						}
					}
				}
			}
			if(grepl("lefse", method, ignore.case = TRUE)){
				data1 <- self$res_lefse
				colnames(data1)[colnames(data1) == "LDA"] <- "Value"
				p1_xtile <- "LDA score"
			}else{
				if(grepl("rf", method, ignore.case = TRUE)){
					data1 <- self$res_rf
					colnames(data1)[colnames(data1) == "MeanDecreaseGini"] <- "Value"
					p1_xtile <- "MeanDecreaseGini"
				}else{
					stop("Provided method is not found, choose lefse or rf!")
				}
			}
			if(simplify_names == T){
				data1$Taxa %<>% gsub(".*\\|", "", .)
				data2$Taxa %<>% gsub(".*\\|", "", .)
			}
			if(keep_prefix == F){
				data1$Taxa %<>% gsub(".__", "", .)
				data2$Taxa %<>% gsub(".__", "", .)
			}
			if(length(use_number) > nrow(data1)){
				use_number <- 1:nrow(data1)
			}
			data1 %<>% .[use_number, ]
			data1$Taxa %<>% factor(., levels = rev(unique(as.character(.))))
			data2 %<>% .[.$Taxa %in% levels(data1$Taxa), ]
			data2$Taxa %<>% factor(., levels = levels(data1$Taxa))
			if(is.null(group_order)){
				data2$Group %<>% as.character %>% as.factor
			}else{
				data2$Group %<>% factor(., levels = rev(group_order))
			}

			p1 <- ggplot(data1, aes(x = Taxa, y = Value)) +
				theme_bw() +
				geom_bar(stat = "identity", fill = plot1_bar_color) +
				coord_flip() +
				xlab("") +
				ylab(p1_xtile) +
				theme(panel.border = element_blank(), panel.background=element_rect(fill="white")) +
				theme(panel.grid.minor.y = element_blank(), panel.grid.major.y = element_blank()) + 
				#, panel.grid.minor.x = element_blank())
				theme(axis.title = element_text(size = 17), axis.text.y = element_text(size = axis_text_y, color = "black")) +
				theme(plot.margin = unit(c(.1, 0, .1, 0), "cm"))

			p2 <- ggplot(data2, aes(x=Taxa, y=Mean, color = Group, fill = Group, group = Group)) +
				geom_bar(stat="identity", position = position_dodge(), width = plot2_barwidth)
			if(use_se == T){
				p2 <- p2 + geom_errorbar(aes(ymin=Mean-SE, ymax=Mean+SE), width=.45, position=position_dodge(plot2_barwidth), color = "black")
			}else{
				p2 <- p2 + geom_errorbar(aes(ymin=Mean-SD, ymax=Mean+SD), width=.45, position=position_dodge(plot2_barwidth), color = "black")
			}
			p2 <- p2 + theme_bw() +
				coord_flip() +
				scale_color_manual(values=color_values) +
				scale_fill_manual(values=color_values) +
				ylab("Relative abundance") +
				theme(legend.position = "right") +
				theme(panel.grid.minor.y = element_blank(), panel.grid.major.y = element_blank(), panel.border = element_blank(), 
					panel.background=element_rect(fill="white")) +
				theme(axis.title = element_text(size = 17)) +
				guides(fill=guide_legend(reverse=TRUE, ncol=1), color = "none")
			
			if(only_abund_plot == T){
				p2 <- p2 + theme(axis.title.y=element_blank(), axis.text.y = element_text(size = axis_text_y, color = "black")) + 
					theme(plot.margin = unit(c(.1, 0, .1, 0), "cm"))
				return(p2)
			}else{
				if(add_significance == T){
					Significance <- rev(as.character(cut(data1$pvalue, breaks=c(-Inf, 0.001, 0.01, 0.05, Inf), label=c("***", "**", "*", ""))))
					p2 <- p2 + scale_x_discrete(labels=Significance) +
						theme(axis.title.y=element_blank(), axis.ticks.y = element_blank(), 
						axis.text.y = element_text(color = plot2_sig_color, size = rel(plot2_sig_size))) +
						theme(plot.margin = unit(c(.1, 0, .1, .8), "cm"))
				}else{
					p2 <- p2 + theme(axis.title.y=element_blank(), axis.ticks.y = element_blank(), axis.text.y = element_blank()) +
						theme(plot.margin = unit(c(.1, 0, .1, 0), "cm"))
				}
				return(list(p1 = p1, p2 = p2))
			}
		},
		#' @description
		#' Bar plot for LDA score.
		#'
		#' @param use_number default 1:10; vector, the taxa numbers used in the plot, 1:n.
		#' @param color_values colors for presentation.
		#' @param LDA_score default NULL; numeric value as the threshold, such as 2, limited with use_number.
		#' @param simplify_names default TRUE; whether use the simplified taxonomic name.
		#' @param keep_prefix default TRUE; whether retain the taxonomic prefix.
		#' @param group_order default NULL; a vector to order the legend in plot.
		#' @param axis_text_y default 12; the size for the y axis text.
		#' @param plot_vertical default TRUE; whether use vertical bar plot or horizontal.
		#' @param ... parameters pass to \code{\link{geom_bar}}
		#' @return ggplot.
		#' @examples
		#' \donttest{
		#' t1$plot_lefse_bar(LDA_score = 4)
		#' }
		plot_lefse_bar = function(
			use_number = 1:10,
			color_values = RColorBrewer::brewer.pal(8, "Dark2"),
			LDA_score = NULL,
			simplify_names = TRUE,
			keep_prefix = TRUE,
			group_order = NULL,
			axis_text_y = 12,
			plot_vertical = TRUE,
			...
			){
			use_data <- self$res_lefse
			if(simplify_names == T){
				use_data$Taxa %<>% gsub(".*\\|", "", .)
			}
			if(keep_prefix == F){
				use_data$Taxa %<>% gsub(".__", "", .)
			}
			if(is.null(LDA_score)){
				sel_num <- use_number
			}else{
				sel_num <- sum(use_data$LDA > LDA_score)
				if(sel_num == 0){
					stop("Too large LDA_score provided, no data selected!")
				}
				sel_num <- 1:sel_num
			}
			if(length(sel_num) > nrow(use_data)){
				sel_num <- 1:nrow(use_data)
			}
			use_data %<>% .[sel_num, ]
			if(is.null(group_order)){
				use_data$Group %<>% as.character %>% as.factor
			}else{
				use_data$Group %<>% factor(., levels = group_order)
			}
			# rearrange orders
			if(length(levels(use_data$Group)) == 2){
				use_data$Taxa %<>% as.character %>% factor(., levels = rev(unique(unlist(lapply(levels(use_data$Group), function(x){
					if(x == levels(use_data$Group)[1]){
						use_data[as.character(use_data$Group) %in% x, ] %>% .[order(.$LDA, decreasing = TRUE), "Taxa"]
					}else{
						use_data[as.character(use_data$Group) %in% x, ] %>% .[order(.$LDA, decreasing = FALSE), "Taxa"]
					}
				})))))
				use_data[use_data$Group == levels(use_data$Group)[2], "LDA"] %<>% {. * -1}
			}else{
				use_data$Taxa %<>% as.character %>% factor(., levels = rev(unique(unlist(lapply(levels(use_data$Group), function(x){
					use_data[as.character(use_data$Group) %in% x, ] %>% .[order(.$LDA, decreasing = TRUE), "Taxa"]
				})))))
			}
			
			p <- ggplot(use_data, aes(x = Taxa, y = LDA, color = Group, fill = Group, group = Group)) +
				geom_bar(stat="identity", position = position_dodge(), ...) +
				theme_bw() +
				scale_color_manual(values=color_values) +
				scale_fill_manual(values=color_values) +
				ylab("LDA score") +
				xlab("") +
				theme(axis.title = element_text(size = 17), axis.text.y = element_text(size = axis_text_y, color = "black")) +
				theme(axis.text.x = element_text(size = 12)) +
				theme(panel.grid.minor.y = element_blank(), panel.grid.major.y = element_blank(), panel.grid.minor.x = element_blank()) +
				theme(panel.border = element_blank()) +
				theme(axis.line.x = element_line(color = "grey60", linetype = "solid", lineend = "square"))
				
			if(plot_vertical == T){
				p <- p + coord_flip()
			}
			p
		},
		#' @description
		#' Plot the cladogram for LEfSe result similar with the python version. Codes are modified from microbiomeMarker 
		#'
		#' @param color default RColorBrewer::brewer.pal(8, "Dark2"); color used in the plot.
		#' @param use_taxa_num default 200; integer; The taxa number used in the background tree plot; select the taxa according to the mean abundance 
		#' @param filter_taxa default NULL; The mean relative abundance used to filter the taxa with low abundance
		#' @param use_feature_num default NULL; integer; The feature number used in the plot; select the features according to the LDA score
		#' @param group_order default NULL; a vector to order the legend in plot.
		#' @param clade_label_level default 4; the taxonomic level for marking the label with letters, root is the largest
		#' @param select_show_labels default NULL; character vector; The features to show in the plot with full label names, not the letters
		#' @param only_select_show default FALSE; whether only use the the select features in the parameter select_show_labels
		#' @param sep default "|"; the seperate character in the taxonomic information
		#' @param branch_size default 0.2; numberic, size of branch
		#' @param alpha default 0.2; shading of the color
		#' @param clade_label_size default 2; basic size for the clade label; please also see clade_label_size_add and clade_label_size_log
		#' @param clade_label_size_add default 5; added basic size for the clade label; see the formula in clade_label_size_log parameter.
		#' @param clade_label_size_log default exp(1); the base of log function for added size of the clade label; the size formula: 
		#'   clade_label_size + log(clade_label_level + clade_label_size_add, base = clade_label_size_log); 
		#'   so use clade_label_size_log, clade_label_size_add and clade_label_size
		#'   can totally control the label size for different taxonomic levels.
		#' @param node_size_scale default 1; scale for the node size
		#' @param node_size_offset default 1; offset for the node size
		#' @param annotation_shape default 22; shape used in the annotation legend
		#' @param annotation_shape_size default 5; size used in the annotation legend
		#' @return ggplot.
		#' @examples
		#' \donttest{
		#' t1$plot_lefse_cladogram(use_taxa_num = 100, use_feature_num = 30, select_show_labels = NULL)
		#' }
		plot_lefse_cladogram = function(
			color = RColorBrewer::brewer.pal(8, "Dark2"),
			use_taxa_num = 200,
			filter_taxa = NULL,
			use_feature_num = NULL,
			group_order = NULL,
			clade_label_level = 4,
			select_show_labels = NULL,
			only_select_show = FALSE,
			sep = "|",
			branch_size = 0.2,
			alpha = 0.2,
			clade_label_size = 2,
			clade_label_size_add = 5,
			clade_label_size_log = exp(1),
			node_size_scale = 1,
			node_size_offset = 1,
			annotation_shape = 22,
			annotation_shape_size = 5
			){
			abund_table <- self$abund_table
			marker_table <- self$res_lefse %>% dropallfactors

			if(!is.null(use_feature_num)){
				marker_table %<>% .[1:use_feature_num, ]
			}
			if(only_select_show == T){
				marker_table %<>% .[.$Taxa %in% select_show_labels, ]
			}
			color <- color[1:length(unique(marker_table$Group))]

			# filter the taxa with unidentified classification or with space, in case of the unexpected error in the following operations
			abund_table %<>% {.[!grepl("\\|.__\\|", rownames(.)), ]} %>%
				{.[!grepl("\\s", rownames(.)), ]} %>%
				# also filter uncleared classification to make it in line with the lefse above
				{.[!grepl("Incertae_sedis|unculture", rownames(.)), ]}

			if(!is.null(use_taxa_num)){
				abund_table %<>% .[names(sort(apply(., 1, mean), decreasing = TRUE)[1:use_taxa_num]), ]
			}
			if(!is.null(filter_taxa)){
				abund_table %<>% .[apply(., 1, mean) > (self$lefse_norm * filter_taxa), ]
			}
			abund_table %<>% .[sort(rownames(.)), ]

			tree_table <- data.frame(taxa = row.names(abund_table), abd = rowMeans(abund_table), stringsAsFactors = FALSE) %>%
				dplyr::mutate(taxa =  paste("r__Root", .data$taxa, sep = sep), abd = .data$abd/max(.data$abd)*100)
			taxa_split <- strsplit(tree_table$taxa, split = sep, fixed = TRUE)
			nodes <- purrr::map_chr(taxa_split, utils::tail, n = 1)

			# check whether some nodes duplicated from bad classification
			if(any(duplicated(nodes))){
				del <- nodes %>% .[duplicated(.)] %>% unique
				for(i in del){
					tree_table %<>% .[!grepl(paste0("\\|", i, "($|\\|)"), .$taxa), ]
				}
				taxa_split <- strsplit(tree_table$taxa, split = sep, fixed = TRUE)
				nodes <- purrr::map_chr(taxa_split, utils::tail, n = 1)
			}

			# add root node
			nodes %<>% c("r__Root", .)
			# levels used for extend of clade label
			label_levels <- purrr::map_chr(nodes, ~ gsub("__.*$", "", .x)) %>%
				factor(levels = rev(unlist(lapply(taxa_split, function(x) gsub("(.)__.*", "\\1", x))) %>% .[!duplicated(.)]))

			# root must be a parent node
			nodes_parent <- purrr::map_chr(taxa_split, ~ .x[length(.x) - 1]) %>%
				c("root", .)

			## add index for nodes
			is_tip <- !nodes %in% nodes_parent
			index <- vector("integer", length(is_tip))
			index[is_tip] <- 1:sum(is_tip)
			index[!is_tip] <- (sum(is_tip)+1):length(is_tip)

			edges <- cbind(parent = index[match(nodes_parent, nodes)], child = index)
			edges <- edges[!is.na(edges[, 1]), ]
			# not label the tips
			node_label <- nodes[!is_tip]
			phylo <- structure(list(
				edge = edges, 
				node.label = node_label, 
				tip.label = nodes[is_tip], 
				edge.length = rep(1, nrow(edges)), 
				Nnode = length(node_label)
				), class = "phylo")
			mapping <- data.frame(
				node = index, 
				abd = c(100, tree_table$abd),
				node_label = nodes, 
				stringsAsFactors = FALSE)
			mapping$node_class <- label_levels
			tree <- tidytree::treedata(phylo = phylo, data = tibble::as_tibble(mapping))
			tree <- ggtree::ggtree(tree, size = 0.2, layout = 'circular')
			
			# color legend order settings
			if(is.null(group_order)){
				color_groups <- marker_table$Group %>% as.character %>% as.factor %>% levels
			}else{
				color_groups <- group_order
			}
			annotation <- private$generate_cladogram_annotation(marker_table, tree = tree, color = color, color_groups = color_groups)
			# backgroup hilight
			annotation_info <- dplyr::left_join(annotation, tree$data, by = c("node" = "label")) %>%
				dplyr::mutate(label = .data$node, id = .data$node.y, level = as.numeric(.data$node_class))
			hilight_para <- dplyr::transmute(
				annotation_info,
				node = .data$id,
				fill = .data$color,
				alpha = alpha,
				extend = private$get_offset(.data$level)
			)
			hilights_g <- purrr::pmap(hilight_para, ggtree::geom_hilight)
			tree <- purrr::reduce(hilights_g, `+`, .init = tree)

			# hilight legend
			hilights_df <- dplyr::distinct(annotation_info, .data$enrich_group, .data$color)
			hilights_df$x <- 0
			hilights_df$y <- 1
			# resort the table used for the legend color and text
			hilights_df %<>% `row.names<-`(.$enrich_group) %>%
				.[color_groups, ]
			# make sure the right order in legend
			hilights_df$enrich_group %<>% factor(., levels = color_groups)

			# add legend
			tree <- tree + 
				geom_rect(aes_(xmin = ~x, xmax = ~x, ymax = ~y, ymin = ~y, fill = ~enrich_group), data = hilights_df, inherit.aes = FALSE) +
				guides(fill = guide_legend(title = NULL, order = 1, override.aes = list(fill = hilights_df$color)))

			# set nodes color and size
			nodes_colors <- rep("white", nrow(tree$data))
			nodes_colors[annotation_info$id] <- annotation_info$color
			node_size <- node_size_scale*log(tree$data$abd) + node_size_offset
			tree$data$node_size <- node_size
			tree <- tree + ggtree::geom_point2(aes(size = I(node_size)), fill = nodes_colors, shape = 21)

			## add clade labels
			clade_label <- dplyr::transmute(
				annotation_info,
				node = .data$id,
				offset = private$get_offset(.data$level)-0.4,
				offset.text = 0,
				angle = purrr::map_dbl(.data$id, private$get_angle, tree = tree),
				label = .data$label,
				fontsize = clade_label_size + log(.data$level + clade_label_size_add, base = clade_label_size_log),
				barsize = 0,
				extend = 0.2,
				hjust = 0.5,
				level = .data$level
			) %>% dplyr::arrange(desc(.data$level))

			clade_label$offset.text <- unlist(lapply(seq_len(nrow(clade_label)), function(x){
				if(clade_label$angle[x] < 180){ 0.2 }else{ 0 }}))
			clade_label$angle <- unlist(lapply(clade_label$angle, function(x){
				if(x < 180){ x - 90 }else{ x + 90 }}))
			clade_label_new <- clade_label

			# add letters label to replace long taxonomic label
			if(is.null(select_show_labels)){
				# outer circle --> larger level; label smaller levels, i.e. finer taxonomy
				ind <- clade_label$level < clade_label_level	
			}else{
				ind <- ! clade_label$label %in% select_show_labels
			}
			ind_num <- sum(ind)

			if(ind_num > 0){
				if(ind_num < 27){
					use_letters <- letters
				}else{
					if(ind_num < 326){
						use_letters <- apply(combn(letters, 2), 2, function(x){paste0(x, collapse = "")})
					}else{
						stop("Too much features to be labelled with letters, consider to use use_feature_num parameter to reduce the number!")
					}
				}
				clade_label_new$label_legend <- clade_label_new$label_show <- clade_label_new$label_raw <- clade_label_new$label
				clade_label_new$label_show[ind] <- use_letters[1:ind_num]
				clade_label_new$label_legend[ind] <- paste0(clade_label_new$label_show[ind], ": ", clade_label_new$label[ind])
				clade_label_new$label <- clade_label_new$label_show
				# delete redundant columns to avoid warnings
				clade_label <- clade_label_new %>% .[, which(! colnames(.) %in% c("label_raw", "label_show", "label_legend", "level"))]
			}

			clade_label_g <- purrr::pmap(clade_label, ggtree::geom_cladelabel)
			tree <- purrr::reduce(clade_label_g, `+`, .init = tree)

			# if letters are used, add guide labels
			if(ind_num > 0){
				guide_label <- clade_label_new[ind, ] %>%
					dplyr::mutate(color = annotation_info$color[match(.data$label_raw, annotation_info$label)])
				tree <- tree + 
					geom_point(data = guide_label, inherit.aes = FALSE, aes_(x = 0, y = 0, shape = ~label_legend), size = 0, stroke = 0) +
						scale_shape_manual(values = rep(annotation_shape, nrow(guide_label))) +
						guides(shape = guide_legend(override.aes = list(
							size = annotation_shape_size, shape = annotation_shape, fill = guide_label$color)))
			}
			tree <- tree + theme(legend.position = "right", legend.title = element_blank())
			tree

		},
		#' @description
		#' Bar plot for metastat.
		#'
		#' @param use_number default 1:10; vector, the taxa numbers used in the plot, 1:n.
		#' @param color_values colors for presentation.
		#' @param qvalue default .05; numeric value as the threshold of q value.
		#' @param choose_group default 1; which column in res_metastat_group_matrix will be used.
		#' @return ggplot.
		#' @examples
		#' \donttest{
		#' t1 <- trans_diff$new(dataset = dataset, method = "metastat", group = "Group")
		#' t1$plot_metastat(use_number = 1:10, qvalue = 0.05, choose_group = 1)
		#' }
		plot_metastat = function(use_number = 1:10, color_values = RColorBrewer::brewer.pal(8, "Dark2"), qvalue = 0.05, choose_group = 1
			){
			use_data <- self$res_metastat
			group_char <- self$res_metastat_group_matrix[, choose_group]
			# updated, collapsed use_group
			use_group <- paste0(as.character(group_char), collapse = " vs ")
			use_data %<>% .[.$qvalue < qvalue & .[,1] == use_group, ]
			use_data[,2] %<>% gsub(paste0(".*", tolower(substr(self$metastat_taxa_level, 1, 1)), "__(.*)$"), "\\1", .)
			use_data %<>% .[.[,2] != "", ]
			
			if(nrow(use_data) > length(use_number)){
				use_data %<>% .[use_number, ]
			}
			plot_data <- data.frame(
				taxa = rep(use_data[,2], 2), 
				Mean = c(use_data[,3], use_data[,6]), 
				SE = c(use_data[,5], use_data[,8]), 
				Group = rep(group_char, times = 1, each = nrow(use_data))
				)
			plot_data$taxa %<>% factor(., levels = unique(.))
			
			p <- ggplot(plot_data, aes(x=taxa, y=Mean, color = Group, fill = Group, group = Group)) +
				geom_bar(stat="identity", position = position_dodge()) +
				geom_errorbar(aes(ymin=Mean-SE, ymax=Mean+SE), width=.45, position=position_dodge(.9), color = "black") +
				theme_classic() +
				scale_color_manual(values=color_values) +
				scale_fill_manual(values=color_values) +
				ylab("Relative abundance") +
				theme(axis.text.x = element_text(angle = 40, colour = "black", vjust = 1, hjust = 1, size = 9), legend.position = "top") +
				theme(axis.title = element_text(size = 15)) +
				xlab(self$metastat_taxa_level)
			
			p
		},
		#' @description
		#' Print the trans_diff object.
		print = function() {
			cat("trans_diff class:\n")
			if(!is.null(self$res_rf)) cat("Randomeforest has been calculated \n")
			if(!is.null(self$res_lefse)) cat("Lefse has been calculated \n")
			if(!is.null(self$res_metastat)) cat("Metastat has been calculated \n")
			invisible(self)
		}
		),
	private = list(
		# group test in lefse or rf
		test_mark = function(dataframe, group, nonpara = TRUE, para = "anova", min_num_nonpara = 1){
			d1 <- as.data.frame(t(dataframe))
			# check normality for parametric test
			if(nonpara == F){
				num_vals <- as.numeric(d1[,1])
				#shapiro test function
				if(length(unique((num_vals[!is.na(num_vals)]))) > 3 & length(unique((num_vals[!is.na(num_vals)]))) < 5000 ){
					d1.shapiro <- shapiro.test(num_vals)$p.value
					if(d1.shapiro > 0.05){
						nonpara = F
					}
				}else{
					message("It was impossible to verify the normality of the taxa ", colnames(d1)[1], " !")
				}
			}
			if(nonpara == T){
				if(any(table(as.character(group))) < min_num_nonpara){
					list(p_value = NA, med = NA)
				}else{
					if(length(unique(as.character(group))) == 2){
						res1 <- wilcox.test(d1[,1], g=group)
					}else{
						res1 <- kruskal.test(d1[,1], g=group)
					}
					if(is.nan(res1$p.value)){
						res1$p.value <- 1
					}
					med <- tapply(d1[,1], group, median) %>% as.data.frame
					colnames(med) <- colnames(d1)
					list(p_value = res1$p.value, med = med)
				}
			}else{
				# Currently, anova is not used in the whole class
				if(para == "anova"){
					colnames(d1)[1] <- "Abundance"
					d2 <- cbind.data.frame(d1, Group = group)
					res1 <- aov(Abundance ~ Group, d2)
					pvalue <- as.numeric(unlist(summary(res1))[9])
					list(p_value = pvalue)
				}
			}
		},
		# generate the cladogram annotation table
		generate_cladogram_annotation = function(marker_table, tree, color, color_groups, sep = "|") {
			use_marker_table <- marker_table
			feature <- use_marker_table$Taxa
			label <- strsplit(feature, split = sep, fixed = TRUE) %>% 
				purrr::map_chr(utils::tail, n =1)
			plot_color <- use_marker_table$Group %>% 
				as.character
			for(i in seq_along(color_groups)){
				plot_color[plot_color == color_groups[i]] <- color[i]
			}
			annotation <- data.frame(
				node = label,
				color = plot_color,
				enrich_group = use_marker_table$Group,
				stringsAsFactors = FALSE
			)
			# filter the feature with bad classification
			annotation %<>% .[label %in% tree$data$label, ]
			annotation
		},
		get_angle = function(tree, node){
			if (length(node) != 1) {
				stop("The length of `node` must be 1")
			}
			tree_data <- tree$data
			sp <- tidytree::offspring(tree_data, node)$node
			sp2 <- c(sp, node)
			sp.df <- tree_data[match(sp2, tree_data$node),]
			mean(range(sp.df$angle))
		},
		get_offset = function(x) {(x*0.2+0.2)^2},
		# metastat input
		calculate_metastat = function(inputdata, g, pflag = FALSE, threshold = NULL, B = NULL){
			trans_data <- private$load_frequency_matrix(input = inputdata)
			res <- private$detect_differentially_abundant_features(jobj = trans_data, g = g, pflag = pflag, threshold = threshold, B = B)
			res
		},
		#*****************************************************************************************************
		#Modified from raw metastat code
		#load up the frequency matrix from a file
		#*****************************************************************************************************
		# Note sep
		load_frequency_matrix = function(input){
			# load names 
			subjects <- array(0,dim=c(ncol(input)));
			for(i in 1:length(subjects)) {
				subjects[i] <- as.character(colnames(input)[i]);
			}
			# load taxa
			taxa <- array(0,dim=c(nrow(input)));
			for(i in 1:length(taxa)) {
				taxa[i] <- as.character(rownames(input)[i]);
			}
			# load remaining counts
			matrix <- array(0, dim=c(length(taxa),length(subjects)));
			for(i in 1:length(taxa)){
				for(j in 1:length(subjects)){ 
					matrix[i,j] <- as.numeric(input[i,j]);
				}
			}
			jobj <- list(matrix=matrix, taxa=taxa)
			return(jobj)
		},
		#  Modified from metastat raw codes
		# http://metastats.cbcb.umd.edu/detect_DA_features.r
		#*****************************************************************************************************
		#  Author: james robert white, whitej@umd.edu, Center for Bioinformatics and Computational Biology.
		#  University of Maryland - College Park, MD 20740
		#
		#  This software is designed to identify differentially abundant features between two groups
		#  Input is a matrix of frequency data. Several thresholding options are available.
		#  See documentation for details.
		#*****************************************************************************************************
		#*****************************************************************************************************
		#  detect_differentially_abundant_features:
		#  the major function - inputs an R object "jobj" containing a list of feature names and the 
		#  corresponding frequency matrix, the argument g is the first column of the second group. 
		#  
		#  -> set the pflag to be TRUE or FALSE to threshold by p or q values, respectively
		#  -> threshold is the significance level to reject hypotheses by.
		#  -> B is the number of bootstrapping permutations to use in estimating the null t-stat distribution.
		#*****************************************************************************************************
		#*****************************************************************************************************
		detect_differentially_abundant_features = function(jobj, g, pflag = NULL, threshold = NULL, B = NULL){
			#**********************************************************************************
			# ************************ INITIALIZE COMMAND-LINE ********************************
			# ************************        PARAMETERS       ********************************
			#**********************************************************************************
			qflag = FALSE;
			if (is.null(B)){
				B = 1000;
			}
			if (is.null(threshold)){
				threshold = 0.05;
			}
			if (is.null(pflag)){
				pflag = TRUE;
				qflag = FALSE;
			}
			if (pflag == TRUE){
				qflag = FALSE;
			}
			if (pflag == FALSE){
				qflag = TRUE;
			}
			#********************************************************************************
			# ************************ INITIALIZE PARAMETERS ********************************
			#********************************************************************************
			Fmatrix <- jobj$matrix;                   # the feature abundance matrix
			taxa <- jobj$taxa;                        # the taxa/(feature) labels of the TAM
			nrows = nrow(Fmatrix);                   
			ncols = ncol(Fmatrix);
			Pmatrix <- array(0, dim=c(nrows,ncols));  # the relative proportion matrix
			C1 <- array(0, dim=c(nrows,3));           # statistic profiles for class1 and class 2
			C2 <- array(0, dim=c(nrows,3));           # mean[1], variance[2], standard error[3]   
			T_statistics <- array(0, dim=c(nrows,1)); # a place to store the true t-statistics 
			pvalues <- array(0, dim=c(nrows,1));      # place to store pvalues
			qvalues <- array(0, dim=c(nrows,1));      # stores qvalues
			#*************************************
			#  convert to proportions
			#  generate Pmatrix
			#*************************************
			totals <- array(0, dim=c(ncol(Fmatrix)));
			for (i in 1:ncol(Fmatrix)) {
				# sum the ith column 
				totals[i] = sum(Fmatrix[,i]);
			}
			for (i in 1:ncols) {   # for each subject
				for (j in 1:nrows) { # for each row
					Pmatrix[j,i] = Fmatrix[j,i]/totals[i];
				}
			}
			#********************************************************************************
			# ************************** STATISTICAL TESTING ********************************
			#********************************************************************************
			if (ncols == 2){  # then we have a two sample comparison
				#************************************************************
				#  generate p values using chisquared or fisher's exact test
				#************************************************************
				for (i in 1:nrows){           # for each feature
					f11 = sum(Fmatrix[i,1]);
					f12 = sum(Fmatrix[i,2]);
					f21 = totals[1] - f11;
					f22 = totals[2] - f12;
					C1[i,1] = f11/totals[1];                       # proportion estimate
					C1[i,2] = (C1[i,1]*(1-C1[i,1]))/(totals[1]-1); # sample variance
					C1[i,3] = sqrt(C1[i,2]);                       # sample standard error
					C2[i,1] = f12/totals[2];
					C2[i,2] = (C2[i,1]*(1-C2[i,1]))/(totals[2]-1);
					C2[i,3] = sqrt(C2[i,2]);
					
					#  f11  f12
					#  f21  f22  <- contigency table format
					contingencytable <- array(0, dim=c(2,2));
					contingencytable[1,1] = f11;
					contingencytable[1,2] = f12;
					contingencytable[2,1] = f21;
					contingencytable[2,2] = f22;
					if (f11 > 20 && f22 > 20){
						csqt <- stats::chisq.test(contingencytable);
						pvalues[i] = csqt$p.value;
					}else{
						ft <- stats::fisher.test(contingencytable, workspace = 8e6, alternative = "two.sided", conf.int = FALSE);
						pvalues[i] = ft$p.value;
					}
				}
				#*************************************
				#  calculate q values from p values
				#*************************************
				qvalues <- private$calc_qvalues(pvalues);
			}else{ # we have multiple subjects per population
				#*************************************
				#  generate statistics mean, var, stderr    
				#*************************************
				for (i in 1:nrows){ # for each taxa
					# find the mean of each group
					C1[i,1] = mean(Pmatrix[i, 1:g-1]);  
					C1[i,2] = stats::var(Pmatrix[i, 1:g-1]); # variance
					C1[i,3] = C1[i,2]/(g-1);    # std err^2 (will change to std err at end)

					C2[i,1] = mean(Pmatrix[i, g:ncols]);  
					C2[i,2] = stats::var(Pmatrix[i, g:ncols]);  # variance
					C2[i,3] = C2[i,2]/(ncols-g+1); # std err^2 (will change to std err at end)
				}
				#*************************************
				#  two sample t-statistics
				#*************************************
				for (i in 1:nrows){                   # for each taxa
					xbar_diff = C1[i,1] - C2[i,1]; 
					denom = sqrt(C1[i,3] + C2[i,3]);
					T_statistics[i] = xbar_diff/denom;  # calculate two sample t-statistic
				}
				#*************************************
				# generate initial permuted p-values
				#*************************************
				pvalues <- private$permuted_pvalues(Pmatrix, T_statistics, B, g, Fmatrix);
				#*************************************
				#  generate p values for sparse data 
				#  using fisher's exact test
				#*************************************
				for (i in 1:nrows){                   # for each taxa
					if (sum(Fmatrix[i,1:(g-1)]) < (g-1) && sum(Fmatrix[i,g:ncols]) < (ncols-g+1)){
						# then this is a candidate for fisher's exact test
						f11 = sum(Fmatrix[i,1:(g-1)]);
						f12 = sum(Fmatrix[i,g:ncols]);
						f21 = sum(totals[1:(g-1)]) - f11;
						f22 = sum(totals[g:ncols]) - f12;
						#  f11  f12
						#  f21  f22  <- contigency table format
						contingencytable <- array(0, dim=c(2,2));
						contingencytable[1,1] = f11;
						contingencytable[1,2] = f12;
						contingencytable[2,1] = f21;
						contingencytable[2,2] = f22;
						ft <- fisher.test(contingencytable, workspace = 8e6, alternative = "two.sided", conf.int = FALSE);
						pvalues[i] = ft$p.value; 
					}  
				}

				#*************************************
				#  calculate q values from p values
				#*************************************
				qvalues <- private$calc_qvalues(pvalues);
				#*************************************
				#  convert stderr^2 to std error
				#*************************************
				for (i in 1:nrows){
					C1[i,3] = sqrt(C1[i,3]);
					C2[i,3] = sqrt(C2[i,3]);
				}
			}
			#*************************************
			#  threshold sigvalues and print
			#*************************************
			sigvalues <- array(0, dim=c(nrows,1));
			if (pflag == TRUE){  # which are you thresholding by?
				sigvalues <- pvalues;
			}else{
				sigvalues <- qvalues;
			}
			s = sum(sigvalues <= threshold);
			Differential_matrix <- array(0, dim=c(s,9));
			dex = 1;
			for (i in 1:nrows){
				if (sigvalues[i] <= threshold){
					Differential_matrix[dex,1]   = jobj$taxa[i];
					Differential_matrix[dex,2:4] = C1[i,];
					Differential_matrix[dex,5:7] = C2[i,];
					Differential_matrix[dex,8]   = pvalues[i];  
					Differential_matrix[dex,9]   = qvalues[i];
					dex = dex+1;  
				}
			}
			# show(Differential_matrix);
			Total_matrix <- array(0, dim=c(nrows,9));
			for (i in 1:nrows){
				Total_matrix[i,1]   = jobj$taxa[i];
				Total_matrix[i,2:4] = C1[i,];
				Total_matrix[i,5:7] = C2[i,];
				Total_matrix[i,8]   = pvalues[i];
				Total_matrix[i,9]   = qvalues[i];
			}
			colnames(Total_matrix) <- c("taxa", "mean(group1)", "variance(group1)", "std.err(group1)", "mean(group2)", "variance(group2)", "std.err(group2)", "pvalue", "qvalue")
			Total_matrix <- Total_matrix[order(Total_matrix[, "pvalue"], decreasing = FALSE), ]
			Total_matrix
		},
		#*****************************************************************************************************
		# takes a matrix, a permutation vector, and a group division g.
		# returns a set of ts based on the permutation.
		#*****************************************************************************************************
		permute_and_calc_ts = function(Imatrix, y, g){
			nr = nrow(Imatrix);
			nc = ncol(Imatrix);
			# first permute the rows in the matrix
			Pmatrix <- Imatrix[,y[1:length(y)]];
			Ts <- private$calc_twosample_ts(Pmatrix, g, nr, nc);
			return (Ts)
		},
		#*****************************************************************************************************
		#  function to calculate qvalues.
		#  takes an unordered set of pvalues corresponding the rows of the matrix
		#*****************************************************************************************************
		calc_qvalues = function(pvalues){
			nrows = length(pvalues);
			# create lambda vector
			lambdas <- seq(0,0.95,0.01);
			pi0_hat <- array(0, dim=c(length(lambdas)));
			# calculate pi0_hat
			for (l in 1:length(lambdas)){ # for each lambda value
				count = 0;
				for (i in 1:nrows){ # for each p-value in order
					if (pvalues[i] > lambdas[l]){
						count = count + 1; 	
					}
					pi0_hat[l] = count/(nrows*(1-lambdas[l]));
				}
			}
			f <- unclass(stats::smooth.spline(lambdas,pi0_hat,df=3));
			f_spline <- f$y;
			pi0 = f_spline[length(lambdas)];   # this is the essential pi0_hat value
			# order p-values
			ordered_ps <- order(pvalues);
			pvalues <- pvalues;
			qvalues <- array(0, dim=c(nrows));
			ordered_qs <- array(0, dim=c(nrows));
			ordered_qs[nrows] <- min(pvalues[ordered_ps[nrows]]*pi0, 1);
			for(i in (nrows-1):1) {
				p = pvalues[ordered_ps[i]];
				new = p*nrows*pi0/i;

				ordered_qs[i] <- min(new,ordered_qs[i+1],1);
			}
			# re-distribute calculated qvalues to appropriate rows
			for (i in 1:nrows){
				qvalues[ordered_ps[i]] = ordered_qs[i];
			}
			################################
			# plotting pi_hat vs. lambda
			################################
			# plot(lambdas,pi0_hat,xlab=expression(lambda),ylab=expression(hat(pi)[0](lambda)),type="p");
			# lines(f);
			return (qvalues);
		},
		##################################################################################
		# metastat code from White et al. (2009) <doi:10.1371/journal.pcbi.1000352>.
		#************************************************************************
		# ************************** SUBROUTINES ********************************
		#*****************************************************************************************************
		#  calc two sample two statistics
		#  g is the first column in the matrix representing the second condition
		#*****************************************************************************************************
		calc_twosample_ts = function(Pmatrix, g, nrows, ncols)
		{
			C1 <- array(0, dim=c(nrows,3));  # statistic profiles
			C2 <- array(0, dim=c(nrows,3)); 
			Ts <- array(0, dim=c(nrows,1));
			if (nrows == 1){
				C1[1,1] = mean(Pmatrix[1:g-1]);
				C1[1,2] = stats::var(Pmatrix[1:g-1]); # variance
				C1[1,3] = C1[1,2]/(g-1);    # std err^2

				C2[1,1] = mean(Pmatrix[g:ncols]);
				C2[1,2] = stats::var(Pmatrix[g:ncols]);  # variance
				C2[1,3] = C2[1,2]/(ncols-g+1); # std err^2
			}else{
				# generate statistic profiles for both groups
				# mean, var, stderr
				for (i in 1:nrows){ # for each taxa
					# find the mean of each group
					C1[i,1] = mean(Pmatrix[i, 1:g-1]);  
					C1[i,2] = stats::var(Pmatrix[i, 1:g-1]); # variance
					C1[i,3] = C1[i,2]/(g-1);    # std err^2

					C2[i,1] = mean(Pmatrix[i, g:ncols]);  
					C2[i,2] = stats::var(Pmatrix[i, g:ncols]);  # variance
					C2[i,3] = C2[i,2]/(ncols-g+1); # std err^2
				}
			}
			# permutation based t-statistics
			for (i in 1:nrows){ # for each taxa
				xbar_diff = C1[i,1] - C2[i,1]; 
				denom = sqrt(C1[i,3] + C2[i,3]);
				Ts[i] = xbar_diff/denom;  # calculate two sample t-statistic 
			}
			return (Ts)
		},
		#*****************************************************************************************************
		#  function to calculate permuted pvalues from Storey and Tibshirani(2003)
		#  B is the number of permutation cycles
		#  g is the first column in the matrix of the second condition 
		#*****************************************************************************************************
		permuted_pvalues = function(Imatrix, tstats, B, g, Fmatrix)
		{
			# B is the number of permutations were going to use!
			# g is the first column of the second sample
			# matrix stores tstats for each taxa(row) for each permuted trial(column)
			M = nrow(Imatrix);
			ps <- array(0, dim=c(M)); # to store the pvalues
			if (is.null(M) || M == 0){
				return (ps);
			}
			permuted_ttests <- array(0, dim=c(M, B));
			ncols = ncol(Fmatrix);
			# calculate null version of tstats using B permutations.
			for (j in 1:B){
				trial_ts <- private$permute_and_calc_ts(Imatrix, sample(1:ncol(Imatrix)), g);
				permuted_ttests[,j] <- abs(trial_ts); 
			}
			# calculate each pvalue using the null ts
			if ((g-1) < 8 || (ncols-g+1) < 8){
				# then pool the t's together!
				# count how many high freq taxa there are
				hfc = 0;
				for (i in 1:M){                   # for each taxa
					if (sum(Fmatrix[i,1:(g-1)]) >= (g-1) || sum(Fmatrix[i,g:ncols]) >= (ncols-g+1)){
						hfc = hfc + 1;
					}
				}
				# the array pooling just the frequently observed ts  
				cleanedpermuted_ttests <- array(0, dim=c(hfc,B));
				hfc = 1;
				for (i in 1:M){
					if (sum(Fmatrix[i,1:(g-1)]) >= (g-1) || sum(Fmatrix[i,g:ncols]) >= (ncols-g+1)){
						cleanedpermuted_ttests[hfc,] = permuted_ttests[i,];
						hfc = hfc + 1;
					}
				}
				#now for each taxa
				for (i in 1:M){  
					ps[i] = (1/(B*hfc))*sum(cleanedpermuted_ttests > abs(tstats[i]));
				}
			}else{
				for (i in 1:M){
					ps[i] = (1/(B+1))*(sum(permuted_ttests[i,] > abs(tstats[i]))+1);
				}
			}
			return(ps)
		}
	),
	lock_class = FALSE,
	lock_objects = FALSE
)
