/*
 * Decompiled with CFR 0.152.
 */
package weka.filters.unsupervised.instance;

import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;
import weka.core.Capabilities;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.Utils;
import weka.filters.Filter;
import weka.filters.UnsupervisedFilter;

public class ReservoirSample
extends Filter
implements UnsupervisedFilter,
OptionHandler {
    static final long serialVersionUID = 3119607037607101160L;
    protected int m_SampleSize = 100;
    protected Instance[] m_subSample;
    protected int m_currentInst;
    protected int m_RandomSeed = 1;
    protected Random m_random;

    public String globalInfo() {
        return "Produces a random subsample of a dataset using the reservoir sampling Algorithm \"R\" by Vitter. The original data set does not have to fit into main memory, but the reservoir does. ";
    }

    public Enumeration listOptions() {
        Vector<Option> vector = new Vector<Option>();
        vector.addElement(new Option("\tSpecify the random number seed (default 1)", "S", 1, "-S <num>"));
        vector.addElement(new Option("\tThe size of the output dataset - number of instances\n\t(default 100)", "Z", 1, "-Z <num>"));
        return vector.elements();
    }

    public void setOptions(String[] stringArray) throws Exception {
        String string = Utils.getOption('S', stringArray);
        if (string.length() != 0) {
            this.setRandomSeed(Integer.parseInt(string));
        } else {
            this.setRandomSeed(1);
        }
        string = Utils.getOption('Z', stringArray);
        if (string.length() != 0) {
            this.setSampleSize(Integer.parseInt(string));
        } else {
            this.setSampleSize(100);
        }
    }

    public String[] getOptions() {
        Vector<String> vector = new Vector<String>();
        vector.add("-S");
        vector.add("" + this.getRandomSeed());
        vector.add("-Z");
        vector.add("" + this.getSampleSize());
        return vector.toArray(new String[vector.size()]);
    }

    public String randomSeedTipText() {
        return "The seed used for random sampling.";
    }

    public int getRandomSeed() {
        return this.m_RandomSeed;
    }

    public void setRandomSeed(int n) {
        this.m_RandomSeed = n;
    }

    public String sampleSizeTipText() {
        return "Size of the subsample (reservoir). i.e. the number of instances.";
    }

    public int getSampleSize() {
        return this.m_SampleSize;
    }

    public void setSampleSize(int n) {
        this.m_SampleSize = n;
    }

    public Capabilities getCapabilities() {
        Capabilities capabilities = super.getCapabilities();
        capabilities.enableAllAttributes();
        capabilities.enable(Capabilities.Capability.MISSING_VALUES);
        capabilities.enableAllClasses();
        capabilities.enable(Capabilities.Capability.MISSING_CLASS_VALUES);
        capabilities.enable(Capabilities.Capability.NO_CLASS);
        return capabilities;
    }

    public boolean setInputFormat(Instances instances) throws Exception {
        super.setInputFormat(instances);
        this.setOutputFormat(instances);
        this.m_subSample = new Instance[this.m_SampleSize];
        this.m_currentInst = 0;
        this.m_random = new Random(this.m_RandomSeed);
        return true;
    }

    protected void processInstance(Instance instance) {
        if (this.m_currentInst < this.m_SampleSize) {
            this.m_subSample[this.m_currentInst] = (Instance)instance.copy();
        } else {
            double d = this.m_random.nextDouble();
            if (d < (double)this.m_SampleSize / (double)this.m_currentInst) {
                d = this.m_random.nextDouble();
                int n = (int)((double)this.m_SampleSize * d);
                this.m_subSample[n] = (Instance)instance.copy();
            }
        }
        ++this.m_currentInst;
    }

    public boolean input(Instance instance) {
        if (this.getInputFormat() == null) {
            throw new IllegalStateException("No input instance format defined");
        }
        if (this.m_NewBatch) {
            this.resetQueue();
            this.m_NewBatch = false;
        }
        if (this.isFirstBatchDone()) {
            this.push(instance);
            return true;
        }
        this.copyValues(instance, false);
        this.processInstance(instance);
        return false;
    }

    public boolean batchFinished() {
        if (this.getInputFormat() == null) {
            throw new IllegalStateException("No input instance format defined");
        }
        if (!this.isFirstBatchDone()) {
            this.createSubsample();
        }
        this.flushInput();
        this.m_NewBatch = true;
        this.m_FirstBatchDone = true;
        return this.numPendingOutput() != 0;
    }

    protected void createSubsample() {
        for (int i = 0; i < this.m_SampleSize && this.m_subSample[i] != null; ++i) {
            Instance instance = (Instance)this.m_subSample[i].copy();
            this.push(instance);
        }
        this.m_subSample = null;
    }

    public String getRevision() {
        return RevisionUtils.extract("$Revision: 1.4 $");
    }

    public static void main(String[] stringArray) {
        ReservoirSample.runFilter(new ReservoirSample(), stringArray);
    }
}

