#' featureFuncs
#'
#' This group of functions return vectors of the corresponding features for
#' the samples passed in. Some of the functions use internal datasets to the
#' CNSig package that specify the chromosome lengths and the centromere 
#' positions.
#'
#' @section extractSegsize:
#' This function returns a vector of all the segment sizes for all for all
#' of the samples.
#'
#' @param segData The samples to extract data from
#' @param chrlen The lengths of the chromosomes from reference genome
#' @param centromeres The positions of the centromeres in reference genome
#' @rdname featureFuncs
extractSegsize = function(segData){
   output = c()

   #Goes through each sample subtracting the segment ends from the starts
   for (i in names(segData)){
      segments = segData[[i]]
      lengths = segments$end - segments$start

      #Bin the segment sizes by million BPs
      lengths = round(lengths/1000000)
      
      output = rbind(output, data.frame(ID = i, value = lengths,stringsAsFactors=FALSE))
   }

   rownames(output) = NULL
   return(output)
}

#' @section extractBP10MB:
#' This function returns a vector of the average number of breakpoints in a per
#' 10MB for each chromosome.
#' @rdname featureFuncs
extractBP10MB = function(segData, chrlen){
   output = c()

   tenMB = 10000000 #Used for making intervals
   for (i in names(segData)){
      segments = segData[[i]]

      allBPNum = c()
      for (chrom in unique(segments$chromosome)){
         breakpoints = segments[segments$chromosome == chrom,"end"]

         #Get length of current chromosome
         currChrlen = chrlen[chrlen[,1] == chrom,2]

         #Calculates the nearest rounded up MB
         maxInterval = tenMB*(currChrlen%/%tenMB + as.logical(currChrlen%%tenMB))

         intervals = seq(0,maxInterval,tenMB)
         res = graphics::hist(breakpoints[-length(breakpoints)],
                    breaks=intervals,plot=F)$counts

         #Finds the mean of the chromosome bp10MB
         allBPNum = c(allBPNum,mean(res))
      }

      output = rbind(output, data.frame(ID = i,value=allBPNum,stringsAsFactors=FALSE))
   }

   rownames(output) = NULL
   return(output)
}

#' @section extractOscillations:
#' This function returns a vector of number of oscillation events found on
#' each of the chromosomes.
#' @rdname featureFuncs
extractOscillations = function(segData,chrlen){
   output = c()

   for (i in names(segData)){
      segments = segData[[i]]

      totalCounts = c()
      for (chrom in unique(segments$chromosome)){
         segVals = segments[segments$chromosome == chrom,"segVal"]

         # If the sample has enough segments
         if (length(segVals) > 3){
            
            # Finds repeats in segmentValues
            repeats = segVals[-c(1,length(segVals))] == segVals[1:(length(segVals)-2)]
            
            # Finds when the val at i+2 equals the value at i
            oscillations = segVals[-c(1,2)] == segVals[1:(length(segVals)-2)]
            
            # Filters out oscillations where there is also a repeat, means
            # i, i+1, and i+2 are all the same, therefore not oscillating
            oscCount = sum(oscillations & !repeats)
         } else {
            oscCount = 0
         }

         totalCounts = c(totalCounts,oscCount)
      }

      output = rbind(output,data.frame(ID=i,value=totalCounts,stringsAsFactors=F))
   }

   rownames(output) = NULL
   return(output)
}

#' @section getBPChrArm:
#' This function returns a vector of number of total breakpoints per chromosome
#' arm.
#' @rdname featureFuncs
extractBPChrArm = function(segData, centromeres, chrlen){
   output = c()
   hadSkip = logical()
   cents = centromeres
   for (i in names(segData)){
      segments = segData[[i]]
      bp_counts = c()
      for (chrom in unique(segments$chromosome)){
         currChrom = segments[segments$chromosome == chrom,]
         if (nrow(currChrom)>1){
            #Grab all segment ends except the last
            ends = currChrom[,3]
            ends = ends[-length(ends)]

            centStart = cents[cents[,2]==chrom,3]
            centEnd = cents[cents[,2]==chrom,4]
            centMid = round((centStart+centEnd)/2)

            bp_counts = rbind(bp_counts, sum(ends > centMid), sum(ends < centMid))
         }
         else {
            bp_counts = rbind(bp_counts,0,0)
         }
      }

      if (nrow(bp_counts) > 0)
         output = rbind(output,data.frame(ID=i,value=bp_counts,stringsAsFactors=F))
   }

   rownames(output) = NULL
   return(output)
}

#' @section extractChangepoints:
#' This function returns a vector of average size of changepoints per chromosome
#' @rdname featureFuncs
extractChangepoints = function(segData, centromeres, chrlen){
   output = c()

   for (i in names(segData)){
      segments = segData[[i]]

      allcp = c()
      for (chrom in unique(segments$chromosome)){
         currChrom = segments[segments$chromosome==chrom,"segVal"]
         if (length(currChrom)>1)
            allcp = c(allcp,mean(abs(currChrom[-1]-currChrom[-length(currChrom)])))
      }
      if (length(allcp) == 0)
         output = rbind(output,data.frame(ID=i,value=0,stringsAsFactors=FALSE))
      else
         output = rbind(output,data.frame(ID=i,value=allcp,stringsAsFactors=FALSE))
   }
   rownames(output) = NULL
   return(output)
}

#' @section extractCN:
#' This function returns a vector of average copynumber per chromosome
#' @rdname featureFuncs
extractCN = function(segData){
   output = c()

   for (i in names(segData)){
      segments = segData[[i]]

      allcn=c()
      for (chrom in unique(segments$chromosome)){
         currChrom = segments[segments$chromosome==chrom,"segVal"]
         allcn = c(allcn,mean(currChrom)) #Avg copynumber per chromosome
      }

      output = rbind(output,data.frame(ID=i,value=allcn,stringsAsFactors=FALSE))
   }

   rownames(output) = NULL
   return(output)
}
