# Copyright 2010-2019 Meik Michalke <meik.michalke@hhu.de>
#
# This file is part of the R package koRpus.
#
# koRpus is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# koRpus is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with koRpus.  If not, see <http://www.gnu.org/licenses/>.


#' Import custom corpus data
#'
#' Read data from a custom corpus into a valid object of class \code{\link[koRpus:kRp.corp.freq-class]{kRp.corp.freq}}.
#'
#' The methods should enable you to perform a basic text corpus frequency analysis. That is, not just to
#' import analysis results like LCC files, but to import the corpus material itself. The resulting object
#' is of class \code{\link[koRpus:kRp.corp.freq-class]{kRp.corp.freq}}, so it can be used for frequency analysis by
#' other functions and methods of this package.
#'
#' @param corpus An object of class \code{kRp.text} (then the column \code{"token"} of the \code{tokens} slot is used).
#' @param caseSens Logical. If \code{FALSE}, all tokens will be matched in their lower case form.
#' @param log.base A numeric value defining the base of the logarithm used for inverse document frequency (idf). See
#'    \code{\link[base:log]{log}} for details.
#' @param ... Additional options for methods of the generic.
#' @return An object of class \code{\link[koRpus:kRp.corp.freq-class]{kRp.corp.freq}}.
#' @return Depending on \code{as.feature}, either an object of class \code{\link[koRpus:kRp.corp.freq-class]{kRp.corp.freq}},
#'    or an object of class \code{\link[koRpus:kRp.text-class]{kRp.text}} with the added feature \code{corp_freq} containing it.
# @author m.eik michalke \email{meik.michalke@@hhu.de}
#' @keywords corpora
#' @seealso \code{\link[koRpus:kRp.corp.freq-class]{kRp.corp.freq}}
#' @import methods
#' @rdname read.corp.custom-methods
#' @export
#' @examples
#' \dontrun{
#' ru.corp <- read.corp.custom("~/mydata/corpora/russian_corpus/")
#' }

#######################################################################
## if this signature changes, check read_corp_custom_calc() as well! ##
#######################################################################

setGeneric("read.corp.custom", function(corpus, caseSens=TRUE, log.base=10, ...) standardGeneric("read.corp.custom"))

#' @param dtm A document term matrix of the \code{corpus} object as generated by \code{\link[koRpus:docTermMatrix]{docTermMatrix}}.
#'    This argument merely exists for cases where you want to re-use an already existing matrix.
#'    By default, it is being created from the \code{corpus} object.
#' @param as.feature Logical, whether the output should be just the analysis results or the input object with
#'    the results added as a feature. Use \code{\link[koRpus:corpusCorpFreq]{corpusCorpFreq}}
#'    to get the results from such an aggregated object.
#' @export
#' @include 01_class_01_kRp.text.R
#' @include koRpus-internal.R
#' @aliases read.corp.custom,kRp.text-method
#' @rdname read.corp.custom-methods
setMethod(
  "read.corp.custom",
  signature(corpus="kRp.text"),
  function(
    corpus,
    caseSens=TRUE,
    log.base=10,
    dtm=docTermMatrix(
      obj=corpus,
      case.sens=caseSens
    ),
    as.feature=FALSE
  ){
    results <- read_corp_custom_calc(
      corpus=corpus,
      dtm=dtm,
      caseSens=caseSens,
      log.base=log.base
    )

    if(isTRUE(as.feature)){
      corpusCorpFreq(corpus) <- results
      return(corpus)
    } else {
      return(results)
    }
  }
)
