% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/FeatureExtractor.R
\name{TEFeatureExtractor}
\alias{TEFeatureExtractor}
\title{Feature extractor for reducing the number for dimensions of text embeddings.}
\value{
Objects of this class are used for reducing the number of dimensions of text embeddings created by an object
of class \link{TextEmbeddingModel}.

For training an object of class \link{EmbeddedText} or \link{LargeDataSetForTextEmbeddings} generated by an object of class
\link{TextEmbeddingModel} is necessary. Passing raw texts is not supported.

For prediction an ob object class \link{EmbeddedText} or \link{LargeDataSetForTextEmbeddings} is necessary that was generated
with the same \link{TextEmbeddingModel} as during training. Prediction outputs a new object of class \link{EmbeddedText} or
\link{LargeDataSetForTextEmbeddings} which contains a text embedding with a lower number of dimensions.

All models use tied weights for the encoder and decoder layers (except \code{method="lstm"}) and apply the estimation of
orthogonal weights. In addition, training tries to train the model to achieve uncorrelated features.

Objects of class \link{TEFeatureExtractor} are designed to be used with classifiers such as \link{TEClassifierRegular} and
\link{TEClassifierProtoNet}.
}
\description{
Abstract class for auto encoders with 'pytorch'.
}
\seealso{
Other Text Embedding: 
\code{\link{TextEmbeddingModel}}
}
\concept{Text Embedding}
\section{Super class}{
\code{\link[aifeducation:AIFEBaseModel]{aifeducation::AIFEBaseModel}} -> \code{TEFeatureExtractor}
}
\section{Methods}{
\subsection{Public methods}{
\itemize{
\item \href{#method-TEFeatureExtractor-configure}{\code{TEFeatureExtractor$configure()}}
\item \href{#method-TEFeatureExtractor-train}{\code{TEFeatureExtractor$train()}}
\item \href{#method-TEFeatureExtractor-load_from_disk}{\code{TEFeatureExtractor$load_from_disk()}}
\item \href{#method-TEFeatureExtractor-extract_features}{\code{TEFeatureExtractor$extract_features()}}
\item \href{#method-TEFeatureExtractor-extract_features_large}{\code{TEFeatureExtractor$extract_features_large()}}
\item \href{#method-TEFeatureExtractor-is_trained}{\code{TEFeatureExtractor$is_trained()}}
\item \href{#method-TEFeatureExtractor-clone}{\code{TEFeatureExtractor$clone()}}
}
}
\if{html}{\out{
<details><summary>Inherited methods</summary>
<ul>
<li><span class="pkg-link" data-pkg="aifeducation" data-topic="AIFEBaseModel" data-id="check_embedding_model"><a href='../../aifeducation/html/AIFEBaseModel.html#method-AIFEBaseModel-check_embedding_model'><code>aifeducation::AIFEBaseModel$check_embedding_model()</code></a></span></li>
<li><span class="pkg-link" data-pkg="aifeducation" data-topic="AIFEBaseModel" data-id="count_parameter"><a href='../../aifeducation/html/AIFEBaseModel.html#method-AIFEBaseModel-count_parameter'><code>aifeducation::AIFEBaseModel$count_parameter()</code></a></span></li>
<li><span class="pkg-link" data-pkg="aifeducation" data-topic="AIFEBaseModel" data-id="get_all_fields"><a href='../../aifeducation/html/AIFEBaseModel.html#method-AIFEBaseModel-get_all_fields'><code>aifeducation::AIFEBaseModel$get_all_fields()</code></a></span></li>
<li><span class="pkg-link" data-pkg="aifeducation" data-topic="AIFEBaseModel" data-id="get_documentation_license"><a href='../../aifeducation/html/AIFEBaseModel.html#method-AIFEBaseModel-get_documentation_license'><code>aifeducation::AIFEBaseModel$get_documentation_license()</code></a></span></li>
<li><span class="pkg-link" data-pkg="aifeducation" data-topic="AIFEBaseModel" data-id="get_ml_framework"><a href='../../aifeducation/html/AIFEBaseModel.html#method-AIFEBaseModel-get_ml_framework'><code>aifeducation::AIFEBaseModel$get_ml_framework()</code></a></span></li>
<li><span class="pkg-link" data-pkg="aifeducation" data-topic="AIFEBaseModel" data-id="get_model_description"><a href='../../aifeducation/html/AIFEBaseModel.html#method-AIFEBaseModel-get_model_description'><code>aifeducation::AIFEBaseModel$get_model_description()</code></a></span></li>
<li><span class="pkg-link" data-pkg="aifeducation" data-topic="AIFEBaseModel" data-id="get_model_info"><a href='../../aifeducation/html/AIFEBaseModel.html#method-AIFEBaseModel-get_model_info'><code>aifeducation::AIFEBaseModel$get_model_info()</code></a></span></li>
<li><span class="pkg-link" data-pkg="aifeducation" data-topic="AIFEBaseModel" data-id="get_model_license"><a href='../../aifeducation/html/AIFEBaseModel.html#method-AIFEBaseModel-get_model_license'><code>aifeducation::AIFEBaseModel$get_model_license()</code></a></span></li>
<li><span class="pkg-link" data-pkg="aifeducation" data-topic="AIFEBaseModel" data-id="get_package_versions"><a href='../../aifeducation/html/AIFEBaseModel.html#method-AIFEBaseModel-get_package_versions'><code>aifeducation::AIFEBaseModel$get_package_versions()</code></a></span></li>
<li><span class="pkg-link" data-pkg="aifeducation" data-topic="AIFEBaseModel" data-id="get_private"><a href='../../aifeducation/html/AIFEBaseModel.html#method-AIFEBaseModel-get_private'><code>aifeducation::AIFEBaseModel$get_private()</code></a></span></li>
<li><span class="pkg-link" data-pkg="aifeducation" data-topic="AIFEBaseModel" data-id="get_publication_info"><a href='../../aifeducation/html/AIFEBaseModel.html#method-AIFEBaseModel-get_publication_info'><code>aifeducation::AIFEBaseModel$get_publication_info()</code></a></span></li>
<li><span class="pkg-link" data-pkg="aifeducation" data-topic="AIFEBaseModel" data-id="get_sustainability_data"><a href='../../aifeducation/html/AIFEBaseModel.html#method-AIFEBaseModel-get_sustainability_data'><code>aifeducation::AIFEBaseModel$get_sustainability_data()</code></a></span></li>
<li><span class="pkg-link" data-pkg="aifeducation" data-topic="AIFEBaseModel" data-id="get_text_embedding_model"><a href='../../aifeducation/html/AIFEBaseModel.html#method-AIFEBaseModel-get_text_embedding_model'><code>aifeducation::AIFEBaseModel$get_text_embedding_model()</code></a></span></li>
<li><span class="pkg-link" data-pkg="aifeducation" data-topic="AIFEBaseModel" data-id="get_text_embedding_model_name"><a href='../../aifeducation/html/AIFEBaseModel.html#method-AIFEBaseModel-get_text_embedding_model_name'><code>aifeducation::AIFEBaseModel$get_text_embedding_model_name()</code></a></span></li>
<li><span class="pkg-link" data-pkg="aifeducation" data-topic="AIFEBaseModel" data-id="is_configured"><a href='../../aifeducation/html/AIFEBaseModel.html#method-AIFEBaseModel-is_configured'><code>aifeducation::AIFEBaseModel$is_configured()</code></a></span></li>
<li><span class="pkg-link" data-pkg="aifeducation" data-topic="AIFEBaseModel" data-id="load"><a href='../../aifeducation/html/AIFEBaseModel.html#method-AIFEBaseModel-load'><code>aifeducation::AIFEBaseModel$load()</code></a></span></li>
<li><span class="pkg-link" data-pkg="aifeducation" data-topic="AIFEBaseModel" data-id="save"><a href='../../aifeducation/html/AIFEBaseModel.html#method-AIFEBaseModel-save'><code>aifeducation::AIFEBaseModel$save()</code></a></span></li>
<li><span class="pkg-link" data-pkg="aifeducation" data-topic="AIFEBaseModel" data-id="set_documentation_license"><a href='../../aifeducation/html/AIFEBaseModel.html#method-AIFEBaseModel-set_documentation_license'><code>aifeducation::AIFEBaseModel$set_documentation_license()</code></a></span></li>
<li><span class="pkg-link" data-pkg="aifeducation" data-topic="AIFEBaseModel" data-id="set_model_description"><a href='../../aifeducation/html/AIFEBaseModel.html#method-AIFEBaseModel-set_model_description'><code>aifeducation::AIFEBaseModel$set_model_description()</code></a></span></li>
<li><span class="pkg-link" data-pkg="aifeducation" data-topic="AIFEBaseModel" data-id="set_model_license"><a href='../../aifeducation/html/AIFEBaseModel.html#method-AIFEBaseModel-set_model_license'><code>aifeducation::AIFEBaseModel$set_model_license()</code></a></span></li>
<li><span class="pkg-link" data-pkg="aifeducation" data-topic="AIFEBaseModel" data-id="set_publication_info"><a href='../../aifeducation/html/AIFEBaseModel.html#method-AIFEBaseModel-set_publication_info'><code>aifeducation::AIFEBaseModel$set_publication_info()</code></a></span></li>
</ul>
</details>
}}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-TEFeatureExtractor-configure"></a>}}
\if{latex}{\out{\hypertarget{method-TEFeatureExtractor-configure}{}}}
\subsection{Method \code{configure()}}{
Creating a new instance of this class.
\subsection{Usage}{
\if{html}{\out{<div class="r">}}\preformatted{TEFeatureExtractor$configure(
  ml_framework = "pytorch",
  name = NULL,
  label = NULL,
  text_embeddings = NULL,
  features = 128,
  method = "lstm",
  noise_factor = 0.2,
  optimizer = "adam"
)}\if{html}{\out{</div>}}
}

\subsection{Arguments}{
\if{html}{\out{<div class="arguments">}}
\describe{
\item{\code{ml_framework}}{\code{string} Framework to use for training and inference. Currently only \code{ml_framework="pytorch"}
is supported.}

\item{\code{name}}{\code{string} Name of the new classifier. Please refer to common name conventions. Free text can be used
with parameter \code{label}.}

\item{\code{label}}{\code{string} Label for the new classifier. Here you can use free text.}

\item{\code{text_embeddings}}{An object of class \link{EmbeddedText} or \link{LargeDataSetForTextEmbeddings}.}

\item{\code{features}}{\code{int} determining the number of dimensions to which the dimension of the text embedding should be
reduced.}

\item{\code{method}}{\code{string} Method to use for the feature extraction. \code{"lstm"} for an extractor based on LSTM-layers or
\code{"dense"} for dense layers.}

\item{\code{noise_factor}}{\code{double} between 0 and a value lower 1 indicating how much noise should be added for the
training of the feature extractor.}

\item{\code{optimizer}}{\code{string} \code{"adam"} or \code{"rmsprop"} .}
}
\if{html}{\out{</div>}}
}
\subsection{Returns}{
Returns an object of class \link{TEFeatureExtractor} which is ready for training.
}
}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-TEFeatureExtractor-train"></a>}}
\if{latex}{\out{\hypertarget{method-TEFeatureExtractor-train}{}}}
\subsection{Method \code{train()}}{
Method for training a neural net.
\subsection{Usage}{
\if{html}{\out{<div class="r">}}\preformatted{TEFeatureExtractor$train(
  data_embeddings,
  data_val_size = 0.25,
  sustain_track = TRUE,
  sustain_iso_code = NULL,
  sustain_region = NULL,
  sustain_interval = 15,
  epochs = 40,
  batch_size = 32,
  dir_checkpoint,
  trace = TRUE,
  ml_trace = 1,
  log_dir = NULL,
  log_write_interval = 10
)}\if{html}{\out{</div>}}
}

\subsection{Arguments}{
\if{html}{\out{<div class="arguments">}}
\describe{
\item{\code{data_embeddings}}{Object of class \link{EmbeddedText} or \link{LargeDataSetForTextEmbeddings}.}

\item{\code{data_val_size}}{\code{double} between 0 and 1, indicating the proportion of cases which should be used for the
validation sample.}

\item{\code{sustain_track}}{\code{bool} If \code{TRUE} energy consumption is tracked during training via the python library
'codecarbon'.}

\item{\code{sustain_iso_code}}{\code{string} ISO code (Alpha-3-Code) for the country. This variable must be set if
sustainability should be tracked. A list can be found on Wikipedia:
\url{https://en.wikipedia.org/wiki/List_of_ISO_3166_country_codes}.}

\item{\code{sustain_region}}{Region within a country. Only available for USA and Canada See the documentation of
'codecarbon' for more information. \url{https://mlco2.github.io/codecarbon/parameters.html}}

\item{\code{sustain_interval}}{\code{int} Interval in seconds for measuring power usage.}

\item{\code{epochs}}{\code{int} Number of training epochs.}

\item{\code{batch_size}}{\code{int} Size of batches.}

\item{\code{dir_checkpoint}}{\code{string} Path to the directory where the checkpoint during training should be saved. If the
directory does not exist, it is created.}

\item{\code{trace}}{\code{bool} \code{TRUE}, if information about the estimation phase should be printed to the console.}

\item{\code{ml_trace}}{\code{int} \code{ml_trace=0} does not print any information about the training process from pytorch on
the console. \code{ml_trace=1} prints a progress bar.}

\item{\code{log_dir}}{\code{string} Path to the directory where the log files should be saved. If no logging is desired set
this argument to \code{NULL}.}

\item{\code{log_write_interval}}{\code{int} Time in seconds determining the interval in which the logger should try to update
the log files. Only relevant if \code{log_dir} is not \code{NULL}.}
}
\if{html}{\out{</div>}}
}
\subsection{Returns}{
Function does not return a value. It changes the object into a trained classifier.
}
}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-TEFeatureExtractor-load_from_disk"></a>}}
\if{latex}{\out{\hypertarget{method-TEFeatureExtractor-load_from_disk}{}}}
\subsection{Method \code{load_from_disk()}}{
loads an object from disk and updates the object to the current version of the package.
\subsection{Usage}{
\if{html}{\out{<div class="r">}}\preformatted{TEFeatureExtractor$load_from_disk(dir_path)}\if{html}{\out{</div>}}
}

\subsection{Arguments}{
\if{html}{\out{<div class="arguments">}}
\describe{
\item{\code{dir_path}}{Path where the object set is stored.}
}
\if{html}{\out{</div>}}
}
\subsection{Returns}{
Method does not return anything. It loads an object from disk.
}
}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-TEFeatureExtractor-extract_features"></a>}}
\if{latex}{\out{\hypertarget{method-TEFeatureExtractor-extract_features}{}}}
\subsection{Method \code{extract_features()}}{
Method for extracting features. Applying this method reduces the number of dimensions of the text
embeddings. Please note that this method should only be used if a small number of cases should be compressed
since the data is loaded completely into memory. For a high number of cases please use the method
\code{extract_features_large}.
\subsection{Usage}{
\if{html}{\out{<div class="r">}}\preformatted{TEFeatureExtractor$extract_features(data_embeddings, batch_size)}\if{html}{\out{</div>}}
}

\subsection{Arguments}{
\if{html}{\out{<div class="arguments">}}
\describe{
\item{\code{data_embeddings}}{Object of class \link{EmbeddedText},\link{LargeDataSetForTextEmbeddings},
\code{datasets.arrow_dataset.Dataset} or \code{array} containing the text embeddings which should be reduced in their
dimensions.}

\item{\code{batch_size}}{\code{int} batch size.}
}
\if{html}{\out{</div>}}
}
\subsection{Returns}{
Returns an object of class \link{EmbeddedText} containing the compressed embeddings.
}
}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-TEFeatureExtractor-extract_features_large"></a>}}
\if{latex}{\out{\hypertarget{method-TEFeatureExtractor-extract_features_large}{}}}
\subsection{Method \code{extract_features_large()}}{
Method for extracting features from a large number of cases. Applying this method reduces the number
of dimensions of the text embeddings.
\subsection{Usage}{
\if{html}{\out{<div class="r">}}\preformatted{TEFeatureExtractor$extract_features_large(
  data_embeddings,
  batch_size,
  trace = FALSE
)}\if{html}{\out{</div>}}
}

\subsection{Arguments}{
\if{html}{\out{<div class="arguments">}}
\describe{
\item{\code{data_embeddings}}{Object of class \link{EmbeddedText} or \link{LargeDataSetForTextEmbeddings} containing the text
embeddings which should be reduced in their dimensions.}

\item{\code{batch_size}}{\code{int} batch size.}

\item{\code{trace}}{\code{bool} If \code{TRUE} information about the progress is printed to the console.}
}
\if{html}{\out{</div>}}
}
\subsection{Returns}{
Returns an object of class \link{LargeDataSetForTextEmbeddings} containing the compressed embeddings.
}
}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-TEFeatureExtractor-is_trained"></a>}}
\if{latex}{\out{\hypertarget{method-TEFeatureExtractor-is_trained}{}}}
\subsection{Method \code{is_trained()}}{
Check if the \link{TEFeatureExtractor} is trained.
\subsection{Usage}{
\if{html}{\out{<div class="r">}}\preformatted{TEFeatureExtractor$is_trained()}\if{html}{\out{</div>}}
}

\subsection{Returns}{
Returns \code{TRUE} if the object is trained and \code{FALSE} if not.
}
}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-TEFeatureExtractor-clone"></a>}}
\if{latex}{\out{\hypertarget{method-TEFeatureExtractor-clone}{}}}
\subsection{Method \code{clone()}}{
The objects of this class are cloneable with this method.
\subsection{Usage}{
\if{html}{\out{<div class="r">}}\preformatted{TEFeatureExtractor$clone(deep = FALSE)}\if{html}{\out{</div>}}
}

\subsection{Arguments}{
\if{html}{\out{<div class="arguments">}}
\describe{
\item{\code{deep}}{Whether to make a deep clone.}
}
\if{html}{\out{</div>}}
}
}
}
