package cc.mallet.classify.tui;

import cc.mallet.pipe.Noop;
import cc.mallet.types.Alphabet;
import cc.mallet.types.FeatureSelection;
import cc.mallet.types.FeatureSequence;
import cc.mallet.types.FeatureVector;
import cc.mallet.types.InfoGain;
import cc.mallet.types.Instance;
import cc.mallet.types.InstanceList;
import cc.mallet.util.CommandOption;
import cc.mallet.util.MalletLogger;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Collections;
import java.util.Iterator;
import java.util.Random;
import java.util.logging.Logger;
import org.eclipse.jdt.internal.corext.refactoring.JDTRefactoringDescriptor;

/* loaded from: input_file:cc/mallet/classify/tui/Vectors2Vectors.class */
public class Vectors2Vectors {
    private static Logger logger = MalletLogger.getLogger(Vectors2Vectors.class.getName());
    static CommandOption.File inputFile = new CommandOption.File(Vectors2Vectors.class, JDTRefactoringDescriptor.ATTRIBUTE_INPUT, "FILE", true, new File("-"), "Read the instance list from this file; Using - indicates stdin.", null);
    static CommandOption.File outputFile = new CommandOption.File(Vectors2Vectors.class, "output", "FILE", true, new File("-"), "Write pruned instance list to this file (use --training-file etc. if you are splitting the list). Using - indicates stdin.", null);
    static CommandOption.File trainingFile = new CommandOption.File(Vectors2Vectors.class, "training-file", "FILE", true, new File("training.vectors"), "Write the training set instance list to this file (or use --output if you are only pruning features); Using - indicates stdout.", null);
    static CommandOption.File testFile = new CommandOption.File(Vectors2Vectors.class, "testing-file", "FILE", true, new File("test.vectors"), "Write the test set instance list to this file; Using - indicates stdout.", null);
    static CommandOption.File validationFile = new CommandOption.File(Vectors2Vectors.class, "validation-file", "FILE", true, new File("validation.vectors"), "Write the validation set instance list to this file; Using - indicates stdout.", null);
    static CommandOption.Double trainingProportion = new CommandOption.Double(Vectors2Vectors.class, "training-portion", "DECIMAL", true, 1.0d, "The fraction of the instances that should be used for training.", null);
    static CommandOption.Double validationProportion = new CommandOption.Double(Vectors2Vectors.class, "validation-portion", "DECIMAL", true, 0.0d, "The fraction of the instances that should be used for validation.", null);
    static CommandOption.Integer randomSeed = new CommandOption.Integer(Vectors2Vectors.class, "random-seed", "INTEGER", true, 0, "The random seed for randomly selecting a proportion of the instance list for training", null);
    static CommandOption.Integer pruneInfogain = new CommandOption.Integer(Vectors2Vectors.class, "prune-infogain", "N", false, 0, "Reduce features to the top N by information gain.", null);
    static CommandOption.Integer pruneCount = new CommandOption.Integer(Vectors2Vectors.class, "prune-count", "N", false, 0, "Reduce features to those that occur more than N times.", null);
    static CommandOption.Boolean vectorToSequence = new CommandOption.Boolean(Vectors2Vectors.class, "vector-to-sequence", "[TRUE|FALSE]", false, false, "Convert FeatureVector's to FeatureSequence's.", null);
    static CommandOption.Boolean hideTargets = new CommandOption.Boolean(Vectors2Vectors.class, "hide-targets", "[TRUE|FALSE]", false, false, "Hide targets.", null);
    static CommandOption.Boolean revealTargets = new CommandOption.Boolean(Vectors2Vectors.class, "reveal-targets", "[TRUE|FALSE]", false, false, "Reveal targets.", null);

    public static void main(String[] strArr) throws FileNotFoundException, IOException {
        CommandOption.setSummary(Vectors2Vectors.class, "A tool for manipulating instance lists of feature vectors.");
        CommandOption.process(Vectors2Vectors.class, strArr);
        if (strArr.length == 0) {
            CommandOption.getList(Vectors2Vectors.class).printUsage(false);
            System.exit(-1);
        }
        Random random = randomSeed.wasInvoked() ? new Random(randomSeed.value) : new Random();
        double d = trainingProportion.value;
        double d2 = validationProportion.value;
        logger.info("Training portion = " + d);
        logger.info("Validation portion = " + d2);
        logger.info("Testing portion = " + ((1.0d - d2) - d));
        logger.info("Prune info gain = " + pruneInfogain.value);
        logger.info("Prune count = " + pruneCount.value);
        InstanceList load = InstanceList.load(inputFile.value);
        if (d == 1.0d && !vectorToSequence.value && !pruneInfogain.wasInvoked() && !pruneCount.wasInvoked() && !hideTargets.wasInvoked() && !revealTargets.wasInvoked()) {
            logger.warning("Vectors2Vectors was invoked, but did not change anything");
            load.save(trainingFile.value());
            System.exit(0);
        }
        if (!pruneInfogain.wasInvoked() && !pruneCount.wasInvoked()) {
            if (vectorToSequence.value) {
                Alphabet dataAlphabet = load.getDataAlphabet();
                Noop noop = new Noop(dataAlphabet, load.getTargetAlphabet());
                InstanceList instanceList = new InstanceList(noop);
                for (int i = 0; i < load.size(); i++) {
                    Instance instance = load.get(i);
                    FeatureVector featureVector = (FeatureVector) instance.getData();
                    ArrayList arrayList = new ArrayList();
                    for (int i2 = 0; i2 < featureVector.numLocations(); i2++) {
                        for (int i3 = 0; i3 < featureVector.valueAtLocation(i2); i3++) {
                            arrayList.add(new Integer(featureVector.indexAtLocation(i2)));
                        }
                    }
                    Collections.shuffle(arrayList);
                    int[] iArr = new int[arrayList.size()];
                    for (int i4 = 0; i4 < iArr.length; i4++) {
                        iArr[i4] = ((Integer) arrayList.get(i4)).intValue();
                    }
                    FeatureSequence featureSequence = new FeatureSequence(dataAlphabet, iArr);
                    instance.unLock();
                    instance.setData(null);
                    instanceList.add(noop.instanceFrom(new Instance(featureSequence, instance.getTarget(), instance.getName(), instance.getSource())), load.getInstanceWeight(i));
                }
                if (outputFile.wasInvoked()) {
                    writeInstanceList(instanceList, outputFile.value());
                    return;
                }
                return;
            }
            if (trainingProportion.wasInvoked() || validationProportion.wasInvoked()) {
                InstanceList[] split = load.split(random, new double[]{d, (1.0d - d) - d2, d2});
                if (split[0].size() > 0) {
                    writeInstanceList(split[0], trainingFile.value());
                }
                if (split[1].size() > 0) {
                    writeInstanceList(split[1], testFile.value());
                }
                if (split[2].size() > 0) {
                    writeInstanceList(split[2], validationFile.value());
                    return;
                }
                return;
            }
            if (hideTargets.wasInvoked()) {
                Iterator<Instance> it = load.iterator();
                while (it.hasNext()) {
                    Instance next = it.next();
                    next.unLock();
                    next.setProperty("target", next.getTarget());
                    next.setTarget(null);
                    next.lock();
                }
                if (outputFile.wasInvoked()) {
                    writeInstanceList(load, outputFile.value());
                    return;
                }
                return;
            }
            if (revealTargets.wasInvoked()) {
                Iterator<Instance> it2 = load.iterator();
                while (it2.hasNext()) {
                    Instance next2 = it2.next();
                    next2.unLock();
                    next2.setTarget(next2.getProperty("target"));
                    next2.lock();
                }
                if (outputFile.wasInvoked()) {
                    writeInstanceList(load, outputFile.value());
                    return;
                }
                return;
            }
            return;
        }
        if (d != 1.0d) {
            throw new UnsupportedOperationException("Infogain/count processing of test or validation lists not yet supported.");
        }
        if (pruneCount.value > 0) {
            Instance instance2 = load.get(0);
            if (instance2.getData() instanceof FeatureSequence) {
                Alphabet dataAlphabet2 = load.getDataAlphabet();
                Alphabet alphabet = new Alphabet();
                Noop noop2 = new Noop(alphabet, load.getTargetAlphabet());
                InstanceList instanceList2 = new InstanceList(noop2);
                double[] dArr = new double[dataAlphabet2.size()];
                for (int i5 = 0; i5 < load.size(); i5++) {
                    ((FeatureSequence) load.get(i5).getData()).addFeatureWeightsTo(dArr);
                }
                while (load.size() > 0) {
                    Instance instance3 = load.get(0);
                    FeatureSequence featureSequence2 = (FeatureSequence) instance3.getData();
                    featureSequence2.prune(dArr, alphabet, pruneCount.value);
                    instanceList2.add(noop2.instanceFrom(new Instance(featureSequence2, instance3.getTarget(), instance3.getName(), instance3.getSource())));
                    load.remove(0);
                }
                logger.info("features: " + dataAlphabet2.size() + " -> " + alphabet.size());
                load = instanceList2;
            } else {
                if (!(instance2.getData() instanceof FeatureVector)) {
                    throw new UnsupportedOperationException("Pruning features from " + instance2.getClass().getName() + " is not currently supported");
                }
                Alphabet alphabet2 = new Alphabet();
                InstanceList instanceList3 = new InstanceList(new Noop(alphabet2, load.getTargetAlphabet()));
                int size = load.getDataAlphabet().size();
                double[] dArr2 = new double[size];
                for (int i6 = 0; i6 < load.size(); i6++) {
                    ((FeatureVector) load.get(i6).getData()).addTo(dArr2);
                }
                BitSet bitSet = new BitSet(size);
                for (int i7 = 0; i7 < size; i7++) {
                    if (dArr2[i7] > pruneCount.value) {
                        bitSet.set(i7);
                    }
                }
                logger.info("Pruning " + (size - bitSet.cardinality()) + " features out of " + size + "; leaving " + bitSet.cardinality() + " features.");
                FeatureSelection featureSelection = new FeatureSelection(load.getDataAlphabet(), bitSet);
                for (int i8 = 0; i8 < load.size(); i8++) {
                    Instance instance4 = load.get(i8);
                    instanceList3.add(new Instance(FeatureVector.newFeatureVector((FeatureVector) instance4.getData(), alphabet2, featureSelection), instance4.getTarget(), instance4.getName(), instance4.getSource()), load.getInstanceWeight(i8));
                    instance4.unLock();
                    instance4.setData(null);
                }
                load = instanceList3;
            }
        }
        if (pruneInfogain.value > 0) {
            Alphabet alphabet3 = new Alphabet();
            Noop noop3 = new Noop(alphabet3, load.getTargetAlphabet());
            InstanceList instanceList4 = new InstanceList(noop3);
            FeatureSelection featureSelection2 = new FeatureSelection(new InfoGain(load), pruneInfogain.value);
            for (int i9 = 0; i9 < load.size(); i9++) {
                Instance instance5 = load.get(i9);
                FeatureVector newFeatureVector = FeatureVector.newFeatureVector((FeatureVector) instance5.getData(), alphabet3, featureSelection2);
                instance5.unLock();
                instance5.setData(null);
                instanceList4.add(noop3.instanceFrom(new Instance(newFeatureVector, instance5.getTarget(), instance5.getName(), instance5.getSource())), load.getInstanceWeight(i9));
            }
            load = instanceList4;
        }
        if (vectorToSequence.value) {
            Alphabet dataAlphabet3 = load.getDataAlphabet();
            Noop noop4 = new Noop(dataAlphabet3, load.getTargetAlphabet());
            InstanceList instanceList5 = new InstanceList(noop4);
            for (int i10 = 0; i10 < load.size(); i10++) {
                Instance instance6 = load.get(i10);
                FeatureVector featureVector2 = (FeatureVector) instance6.getData();
                ArrayList arrayList2 = new ArrayList();
                for (int i11 = 0; i11 < featureVector2.numLocations(); i11++) {
                    for (int i12 = 0; i12 < featureVector2.valueAtLocation(i11); i12++) {
                        arrayList2.add(new Integer(featureVector2.indexAtLocation(i11)));
                    }
                }
                Collections.shuffle(arrayList2);
                int[] iArr2 = new int[arrayList2.size()];
                for (int i13 = 0; i13 < iArr2.length; i13++) {
                    iArr2[i13] = ((Integer) arrayList2.get(i13)).intValue();
                }
                FeatureSequence featureSequence3 = new FeatureSequence(dataAlphabet3, iArr2);
                instance6.unLock();
                instance6.setData(null);
                instanceList5.add(noop4.instanceFrom(new Instance(featureSequence3, instance6.getTarget(), instance6.getName(), instance6.getSource())), load.getInstanceWeight(i10));
            }
            load = instanceList5;
        }
        if (outputFile.wasInvoked()) {
            writeInstanceList(load, outputFile.value());
        } else {
            if (!trainingFile.wasInvoked()) {
                throw new IllegalArgumentException("You must specify a file to write to, using --output [filename]");
            }
            writeInstanceList(load, trainingFile.value());
        }
    }

    private static void writeInstanceList(InstanceList instanceList, File file) throws FileNotFoundException, IOException {
        logger.info("Writing instance list to " + file);
        instanceList.save(file);
    }
}
