# ----------------------------------------------------------------------

#' Convert ELAN EAF files to a multicastR table (WIP)
#'
#' \code{mc_eaf_to_mcr} converts EAF files produced by the linguistic annotation
#' software ELAN into a multicastR table. The EAF files must have the correct
#' tier structure and names, specifically those used for the Multi-CAST
#' collection: ...
#'
#' @param readfrom Directory from which to read EAF files. Defaults to the
#'   current working directory.
#' @param recursive Logical. If \code{TRUE}, recurses into subdirectories.
#' @param write Logical. If \code{TRUE}, create an output file in \code{txt}
#'   format.
#' @param writeto If \code{write} is \code{TRUE}, a directory to which to write
#'   output. Defaults to \code{getwd}. Ignored if \code{write} is \code{FALSE}.
#' @param filename A length 1 character vector containing the name of the
#'   written output. If empty, defaults to "multicast_YYMM", where 'YY' are the
#'   last two digits of the current year and 'MM' the current month. Ignored if
#'   \code{write} is \code{FALSE}.
#'
#' @return A \code{\link[data.table]{data.table}} of the form produced by
#'   \code{\link{multicast}}, containing the annotation values of the EAF files
#'   read.
#'
#' @keywords internal
#' @export
mc_eaf_to_mcr <- function(readfrom = getwd(), recursive = FALSE, write = FALSE,
						  writeto = getwd(), filename = "") {
	# get list of files to read
	filelist <- list.files(path = readfrom, pattern = "\\.eaf", full.names = TRUE,
						   recursive = recursive)

	# read each EAF file and convert it to a multicastR table
	message(paste0("Reading ", length(filelist), " files."))
	mcastr <- lapply(filelist, mc_convert_mcr)
	mcastr <- rbindlist(mcastr, fill = TRUE)

	# write table to file
	if (write == TRUE) {
		mc_write_mcr(mcastr, writeto, filename)
	} else {
		# return table
		return(mcastr)
	}
}

# ----------------------------------------------------------------------

# ----------------------------------------------------------------------

#' Shape input from EAF files
#'
#' Called by \code{\link{mc_eaf_to_mcr}} for each EAF file in
#' the list. While reading the EAFs themselves is handled by
#' \code{\link{mc_read_eaf}}, this function brings the output into
#' the proper shape.
#'
#' @param eaffile Path to and filename of an EAF file to be converted.
#'
#' @return A \code{\link[data.table]{data.table}} containing the annotation
#'   values of a single EAF file.
#'
#' @keywords internal
mc_convert_mcr <- function(eaffile) {
	# convert EAF file
	mcastr <- mc_read_eaf(eaffile)

	# select columns
	mcastr <- mcastr[, c("uttid_val", "gwords_val", "gloss_val", "graid_val",
						 "refind_val", "reflex_val")]

	# split utterance_id into corpus, file, and uid
	mcastr[, c("corpus", "file", "uid") := transpose(stringi::stri_split_fixed(uttid_val,
																			   "_", n = 3))]

	# split graid into form, animacy, and function
	mcastr[, c("gformanim", "gfunc") := transpose(stringi::stri_split_fixed(graid_val, ":",
																			n = 2))]
	mcastr[, c("gform", "ganim") := transpose(stringi::stri_split_fixed(gformanim, ".",
																		n = 2))]

	# restore split clause boundaries
	mcastr[grepl("#|%", graid_val) & ganim != "", ganim := ""]
	mcastr[grepl("#|%", graid_val) & ganim != "", gform := graid_val]

	# replace NAs with empty strings
	mcastr[is.na(mcastr)] <- ""

	# select columns
	mcastr <- mcastr[, c("corpus", "file", "uid", "gwords_val", "gloss_val",
						 "graid_val", "gform", "ganim", "gfunc", "refind_val", "reflex_val")]

	# rename columns
	setnames(mcastr, c(4:6, 10:11), c("word", "gloss", "graid", "refind", "reflex"))

	# give a status update
	message(paste0("Finished converting text '", mcastr[1, corpus], "_", mcastr[1, file], "'."))

	# return finished table
	return(mcastr)
}

# stop RMD CHECK from complaining about global variables
if (getRversion() >= "2.15.1") {
	utils::globalVariables(c("uttid_val", "graid_val",
							 "gformanim", "gform", "ganim",
							 "corpus"))
}

# ----------------------------------------------------------------------

# ----------------------------------------------------------------------

#' Write a multicastR table to file
#'
#' Writes a table generated by \code{\link{mc_convert_mcr}} to a \code{txt}
#' file.
#'
#' @param raw A \code{\link[data.table]{data.table}} in multicastR format.
#' @param writeto Directory to which to write output to. Defaults to
#'   \code{getwd}.
#' @param filename A length 1 character vector containing the name of the
#'   written output. If empty, defaults to "multicast_YYMM".
#'
#' @return None.
#'
#' @keywords internal
mc_write_mcr <- function(raw, writeto, filename) {
	# check for missing or empty filename
	if (mc_missarg(filename) | filename == "") {
		filename <- paste0("multicast_",
						   sub(".*?(\\d\\d)$", "\\1", date()),
						   stringi::stri_pad(match(sub("^.*? (.*?) .*$", "\\1", date()),
						   					       month.abb),
						   				     2, pad = "0"))
	}

	# write to file
	fwrite(raw,
	   file = paste0(writeto, filename, ".txt"),
	   sep = "\t",
	   quote = FALSE,
	   col.names = TRUE,
	   showProgress = FALSE,
	   verbose = FALSE)

	# write confirmation message to console
	message(paste0("Table written to '", writeto, "' as '", filename, ".txt'."))
}
