
read_translated_text <- function(file, mapping=NULL){

  if(!is.character(file))stop("file must be a character-vector")

  if(is.null(mapping)){
    path  <- system.file("extdata", "etcsl_mapping.txt", package = "sumer")
    mapping <- read.csv2(path, sep=";", na.strings="")
  }

  x <- character(0)
  for(i in 1:length(file)){
    if(str_ends(file[i], fixed(".docx"))){
      doc     <- read_docx(file[i])
      doc_xml <- doc$doc_obj$get()
      bookmark_starts <- xml_find_all(doc_xml, "//w:bookmarkStart")
      bookmark_ends   <- xml_find_all(doc_xml, "//w:bookmarkEnd")
      xml_remove(bookmark_starts)
      xml_remove(bookmark_ends)
      text   <- docx_summary(doc)$text
    }else{
      text <- readLines(file[i])
    }
    text <- str_trim(text)
    text <- text[str_starts(text, fixed("|"))]
    x <- c(x, text)
  }

  x <- str_sub(x,2)
  x <- str_squish(x)

  first  <- str_split_fixed(x, ":", 2)
  second <- str_split_fixed(first[,2], ":", 2)

  df <- data.frame(
    sign_name = first[,1],
    type      = ifelse(second[,2] == "", "", second[,1]),
    meaning   = ifelse(second[,2] == "", second[,1], second[,2]),
    stringsAsFactors = FALSE
  )

  if(any(df$type=="")){
    missing <- x[df$type==""]
    warning("The following translations have missing types:\n",
            paste(missing, collapse = "\n"),
            call. = FALSE)
  }

  df$meaning   <- str_split_fixed(df$meaning, fixed(";"), 2)[, 1]
  df$meaning   <- str_split_fixed(df$meaning, fixed("|"), 2)[, 1]
  df$meaning   <- str_squish(df$meaning)

  df$type      <- str_squish(df$type)
  df$type      <- str_replace_all(df$type, " ", "")
  df$type      <- str_replace_all(df$type, "->", "\u2192")
  df$type      <- str_replace_all(df$type, "x", "\u2612")

  df$sign_name <- str_split_fixed(df$sign_name, "=", 2)[, 1]
  df$sign_name <- flatten_cuneiform(df$sign_name)

  df <- df[df$sign_name!="",]
  rownames(df) <- 1:nrow(df)

  df$sign_name <- as.sign_name(df$sign_name, mapping=mapping)

  return(df)
}
