% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/HMM.R
\name{HMM}
\alias{HMM}
\title{Bayesian Homogeneous Markov Model (NHMM)}
\usage{
HMM(y, subseq = NULL, dirprior = NULL, K = 2, iters = 1000,
  burnin = 200, emdist = "normal", nmix = 1, delta = FALSE, W = NULL,
  psipriorm = NULL, psipriorp = NULL, priors = NULL, outdir = NULL,
  ymiss = FALSE, yrep = 0, ypred = 0, Wp = NULL, pT = NULL,
  yhold = NULL)
}
\arguments{
\item{y}{T by J matrix of data (J=1 is sufficient)
-  missing data is denoted with NA}

\item{subseq}{[optional] if y is actually a set of subsequences then give the length of those 
sequences (122 for JJAS) (365 is not it!). Default is subseq=T.}

\item{dirprior}{[optional] prior for Dirichlet prior on the rows of the transition matrix
only for the HMM, must be size KxK. If not supplied a flat prior is used.}

\item{K}{number of states (default=2)}

\item{iters}{number of iterations to keep after burn in  (default=1000)}

\item{burnin}{the number of burn in  (default=200)}

\item{emdist}{emission distribution:  "normal", "poisson", "gamma"
      actual choices are Normal, Poisson,  Gamma,  Exponential,  
or finite mixtures of mixtures or zero inflated version of any of these}

\item{nmix}{[optional] number of mixture components for emdist, default is one (do not include delta)}

\item{delta}{[TRUE/FALSE] TRUE-if we are using a zero inflated distribution
(adds a delta function at zero as the first mixture component)}

\item{W}{[optional] is an A by T by J array of emission input data (A different inputs), 
missing values are not allowed, do not include an intercept term here
The mixture components of emission depend on W.}

\item{psipriorm}{[optional]  default=NULL which is reference prior. Or a [K+A by J matrix] for
the mean of the Normal prior for the beta coefficients.}

\item{psipriorp}{[optional]  default=NULL which is reference prior. Or a [K+A by J matrix] for
the precision prior(1/sig^2) of the Normal prior for the beta coefficients.}

\item{priors}{priors for emission components, each state can have a different prior
        dimension 5 by nmix by K by J  (some of the 5 dimensions are filled with zeros for some distributions)
        Normal(mu,sig2) or reference prior: mu~Normal(a.mu, 1/b.mu^2), sig2~IG(a.sig,b.sig)
           -the first 4 rows are used [a.mu, b.mu, a.sig, b.sig] but the 5th row is not used but must be present. 
           -reference prior: priors=NULL  
           
          Poisson(lambda):  lambda~Gamma(a.lam,b.lam)  
           -the first 2 rows are used [a.lam, b.lam] but the 3rd-5th rows are not used but must be present. 

        Gamma(alpha,beta): alpha~Gamma(a.alpha,b.alpha) beta~Gamma(a.beta,b.beta) 
              -if the second row is supplied as NA then the first row supplies fixed first parameters for the Gamma
                this is the suggested parametrization of the Gamma. Estimating the first parameter of the Gamma distribution
                switches to an MCMC that is not as stable. 
              -to create an Exponential distribution the first row is set to 1 and the second is set to NA
                then the third and fourth rows contain the (a.lam, b.lam) priors for the parameter of the Exponential
                if prior=NULL then these are set to ones. setting these parameters to zeros is not a good idea.
              -a non-informative prior may not work, use a matrix of ones for a lowly informative prior instead of zeros.
              -the 5th row is for the tuning parameter for the MCMC (try 0.5) if a two parameter Gamma is desired.
                A 2 parameter Gamma needs an informative prior and results may be more unstable.
              -default prior=NULL is the Exponential distribution with lowly informative prior of ones}

\item{outdir}{[optional] can output each set of parameters to output files in a directory 
use this with larger dimension data sets or with large number of iterations 
the output will be written line by line and not overburden the memory limit
outdir needs to end with a slash or double slash depending on OS.}

\item{ymiss}{[optional-TRUE/FALSE] if outdir is specified then draws for any missing
data points will be saved to ymiss-J*.txt. There will be one data point for
each sequence of length Tmiss (the amount missing from that sequence).
Each row of the output file will be an iteration of the algorithm after burnin is
removed.}

\item{yrep}{[optional] number (ie. 100,200,or 500) of output replicate data sets 
to print to outdir. The replicates will be the same dimension as y and start
after burn in period. Default is zero. These replicate data sets are 
generated from the same input variables.
- must be shorter than iters}

\item{ypred}{number of predictive sets (ie. 0,100, 200, 500)
        - must be shorter than iters
Predicted chains: will produce ypred set of predictive [pT by J] to print to outdir.
       yrep uses the same inputs to make replications, this uses new inputs
       to make predictions over a different time span (pT)
       Also outputs a set of predictive z values or length (pT by ypred)}

\item{Wp}{predictive set of Ws of length pT [A by pT by J]
-missing valus are not allowed
-ensure that the Wp inputs are in the same order as W}

\item{pT}{the length of the new sequence}

\item{yhold}{[optioinal] a sequence of y observed values [pT by J], held out data that is 
of length ypred that is used to compute  the predictive log score (PLS)
which is a metric like BIC (ie. hold out last 10% of data or do 5-fold CV). 
missing data values are filled in with mean PLS value. 
PLS is the average PLS across sequences.}
}
\value{
my.hmm object
}
\description{
\code{HMM} calculates an HMM for multiple sequences of data. 
The sequences can actually be short sets of equal length sequences (subseq).
A set of input variables (W) can be included to influence the mixture 
proportions of the emission distributions. The HMM follows the general weather
state formulation of Hughes and Guttorp but in a Bayesian fashion. All parameters are sampled
via Gibbs steps (latent variables such that no tuning is needed.) The W variable coefficients
are sampled through an ordered Mulinomial probit (Albert and Chib). The X variable coefficients
are sampled through an unordered Multinomial logit model Polya-Gamma formulation (Polson, Scott, Windle). 
The hidden states are sampled through a blocked Gibbs sampler.
}
\examples{
## Gamma or Exponential
### because we do not supply "priors" as an input it fits an Exponetial distribution
  \dontrun{
data(NHMMdata)
attach(NHMMdata)

my.hmm1=HMM(y=ygamma,  K=3, iters=100, burnin=10, emdist="gamma",
            nmix=3, delta=TRUE)
OBIC(my.hmm1)
zz=Oz(my.hmm1)  #compare with the truth zgamma
qq=OQQ(my.hmm1)
pp=OWcoef(my.hmm1,FALSE)
tt=Oemparams(my.hmm1,FALSE)
 

 ## Normal
 my.hmm2=HMM(y=ynormal, subseq=100,  K=3, iters=100, burnin=10, 
             emdist="normal", nmix=2, delta=FALSE)
 OBIC(my.hmm2)
 
 ## Poisson
 my.hmm3=HMM(y=ypoisson, K=3, iters=100, burnin=10, emdist="poisson", 
             nmix=2, delta=FALSE)
 OBIC(my.hmm3)
 
 ## Predictive estimation - make 15 predictive data sets (new X) and 20 replicate data sets (same X)
 filelocation="C:\\\\Users\\\\iamrandom\\\\Desktop\\\\here\\\\"
 my.hmm5=HMM(y=ygamma,  W=tW, K=3, iters=100, burnin=10, 
           emdist="gamma", nmix=3, delta=TRUE,
         outdir=filelocation, pT=200, yrep=20,  Wp=Wp1, ypred=15)
 OBIC(my.hmm5)
 pp=OWcoef(my.hmm5,filelocation)
 
 ## Gamma with fixed first variables nmix=2
 nmix=2; K=3; J=dim(ygamma)[2]
 prior1=array(1,dim=c(5,nmix,K,J));  prior1[1,1,,]=1;  prior1[1,2,,]=2;  prior1[2,,,]=NA
 my.hmm6=HMM(y=ygamma, priors=prior1, K=3, iters=100, burnin=10, 
             emdist="gamma", nmix=2, delta=TRUE)
 OBIC(my.hmm6)
 Oemparams(my.hmm6)
 
 
 ### my.nhmm7 (K=3)  (yhold is the last 10\% of the data)
 filelocation="C:\\\\Users\\\\iamrandom\\\\Desktop\\\\here\\\\"
 my.hmm7=HMM(y=ygamma[1:1800,],   W=array(tW[,1:1800,],
             dim=c(1,1800,15)), K=3, iters=50, burnin=10, 
            emdist="gamma", nmix=3, delta=TRUE, outdir=filelocation, 
             ymiss=TRUE, yrep=10, pT=200, 
            Wp=array(tW[,1801:2000,],dim=c(1,200,15)), ypred=10, 
             yhold=ygamma[1801:2000,])
 OBIC(my.hmm7)

 # run it with K=3 and then K=1 and compare using both BIC and PLS
 }
}
\keyword{Bayesian}
\keyword{HMM}

