package cc.mallet.topics.tui;

import cc.mallet.topics.PAM4L;
import cc.mallet.topics.ParallelTopicModel;
import cc.mallet.topics.PolylingualTopicModel;
import cc.mallet.topics.TopicalNGrams;
import cc.mallet.types.FeatureSequence;
import cc.mallet.types.InstanceList;
import cc.mallet.util.CommandOption;
import cc.mallet.util.Randoms;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.io.PrintWriter;
import org.eclipse.jdt.internal.corext.refactoring.JDTRefactoringDescriptor;

/* loaded from: input_file:cc/mallet/topics/tui/Vectors2Topics.class */
public class Vectors2Topics {
    static CommandOption.String inputFile;
    static CommandOption.SpacedStrings languageInputFiles;
    static CommandOption.String testingFile;
    static CommandOption.String outputModelFilename;
    static CommandOption.String inputModelFilename;
    static CommandOption.String inferencerFilename;
    static CommandOption.String evaluatorFilename;
    static CommandOption.String stateFile;
    static CommandOption.String topicKeysFile;
    static CommandOption.String topicWordWeightsFile;
    static CommandOption.String wordTopicCountsFile;
    static CommandOption.String topicReportXMLFile;
    static CommandOption.String topicPhraseReportXMLFile;
    static CommandOption.String docTopicsFile;
    static CommandOption.Double docTopicsThreshold;
    static CommandOption.Integer docTopicsMax;
    static CommandOption.Integer numTopics;
    static CommandOption.Integer numThreads;
    static CommandOption.Integer numIterations;
    static CommandOption.Integer randomSeed;
    static CommandOption.Integer topWords;
    static CommandOption.Integer showTopicsInterval;
    static CommandOption.Integer outputModelInterval;
    static CommandOption.Integer outputStateInterval;
    static CommandOption.Integer optimizeInterval;
    static CommandOption.Integer optimizeBurnIn;
    static CommandOption.Boolean useSymmetricAlpha;
    static CommandOption.Boolean useNgrams;
    static CommandOption.Boolean usePAM;
    static CommandOption.Double alpha;
    static CommandOption.Double beta;
    static CommandOption.Double gamma;
    static CommandOption.Double delta;
    static CommandOption.Double delta1;
    static CommandOption.Double delta2;
    static CommandOption.Integer pamNumSupertopics;
    static CommandOption.Integer pamNumSubtopics;
    static final /* synthetic */ boolean $assertionsDisabled;

    public static void main(String[] strArr) throws IOException {
        CommandOption.setSummary(Vectors2Topics.class, "A tool for estimating, saving and printing diagnostics for topic models, such as LDA.");
        CommandOption.process(Vectors2Topics.class, strArr);
        if (usePAM.value) {
            InstanceList load = InstanceList.load(new File(inputFile.value));
            System.out.println("Data loaded.");
            if (inputModelFilename.value != null) {
                throw new IllegalArgumentException("--input-model not supported with --use-pam.");
            }
            PAM4L pam4l = new PAM4L(pamNumSupertopics.value, pamNumSubtopics.value);
            pam4l.estimate(load, numIterations.value, 50, showTopicsInterval.value, outputModelInterval.value, outputModelFilename.value, randomSeed.value == 0 ? new Randoms() : new Randoms(randomSeed.value));
            pam4l.printTopWords(topWords.value, true);
            if (stateFile.value != null) {
                pam4l.printState(new File(stateFile.value));
            }
            if (docTopicsFile.value != null) {
                PrintWriter printWriter = new PrintWriter(new FileWriter(new File(docTopicsFile.value)));
                pam4l.printDocumentTopics(printWriter, docTopicsThreshold.value, docTopicsMax.value);
                printWriter.close();
            }
            if (outputModelFilename.value != null) {
                if (!$assertionsDisabled && pam4l == null) {
                    throw new AssertionError();
                }
                try {
                    ObjectOutputStream objectOutputStream = new ObjectOutputStream(new FileOutputStream(outputModelFilename.value));
                    objectOutputStream.writeObject(pam4l);
                    objectOutputStream.close();
                    return;
                } catch (Exception e) {
                    e.printStackTrace();
                    throw new IllegalArgumentException("Couldn't write topic model to filename " + outputModelFilename.value);
                }
            }
            return;
        }
        if (useNgrams.value) {
            InstanceList load2 = InstanceList.load(new File(inputFile.value));
            System.out.println("Data loaded.");
            if (inputModelFilename.value != null) {
                throw new IllegalArgumentException("--input-model not supported with --use-ngrams.");
            }
            TopicalNGrams topicalNGrams = new TopicalNGrams(numTopics.value, alpha.value, beta.value, gamma.value, delta.value, delta1.value, delta2.value);
            topicalNGrams.estimate(load2, numIterations.value, showTopicsInterval.value, outputModelInterval.value, outputModelFilename.value, randomSeed.value == 0 ? new Randoms() : new Randoms(randomSeed.value));
            topicalNGrams.printTopWords(topWords.value, true);
            if (stateFile.value != null) {
                topicalNGrams.printState(new File(stateFile.value));
            }
            if (docTopicsFile.value != null) {
                PrintWriter printWriter2 = new PrintWriter(new FileWriter(new File(docTopicsFile.value)));
                topicalNGrams.printDocumentTopics(printWriter2, docTopicsThreshold.value, docTopicsMax.value);
                printWriter2.close();
            }
            if (outputModelFilename.value != null) {
                if (!$assertionsDisabled && topicalNGrams == null) {
                    throw new AssertionError();
                }
                try {
                    ObjectOutputStream objectOutputStream2 = new ObjectOutputStream(new FileOutputStream(outputModelFilename.value));
                    objectOutputStream2.writeObject(topicalNGrams);
                    objectOutputStream2.close();
                    return;
                } catch (Exception e2) {
                    e2.printStackTrace();
                    throw new IllegalArgumentException("Couldn't write topic model to filename " + outputModelFilename.value);
                }
            }
            return;
        }
        if (languageInputFiles.value != null) {
            InstanceList[] instanceListArr = new InstanceList[languageInputFiles.value.length];
            for (int i = 0; i < instanceListArr.length; i++) {
                instanceListArr[i] = InstanceList.load(new File(languageInputFiles.value[i]));
                if (instanceListArr[i] != null) {
                    System.out.println(i + " is not null");
                } else {
                    System.out.println(i + " is null");
                }
            }
            System.out.println("Data loaded.");
            if (instanceListArr[0].size() > 0 && instanceListArr[0].get(0) != null && !(instanceListArr[0].get(0).getData() instanceof FeatureSequence)) {
                System.err.println("Topic modeling currently only supports feature sequences: use --keep-sequence option when importing data.");
                System.exit(1);
            }
            PolylingualTopicModel polylingualTopicModel = new PolylingualTopicModel(numTopics.value, alpha.value);
            if (randomSeed.value != 0) {
                polylingualTopicModel.setRandomSeed(randomSeed.value);
            }
            polylingualTopicModel.addInstances(instanceListArr);
            polylingualTopicModel.setTopicDisplay(showTopicsInterval.value, topWords.value);
            polylingualTopicModel.setNumIterations(numIterations.value);
            polylingualTopicModel.setOptimizeInterval(optimizeInterval.value);
            polylingualTopicModel.setBurninPeriod(optimizeBurnIn.value);
            if (outputStateInterval.value != 0) {
                polylingualTopicModel.setSaveState(outputStateInterval.value, stateFile.value);
            }
            if (outputModelInterval.value != 0) {
                polylingualTopicModel.setModelOutput(outputModelInterval.value, outputModelFilename.value);
            }
            polylingualTopicModel.estimate();
            if (topicKeysFile.value != null) {
                polylingualTopicModel.printTopWords(new File(topicKeysFile.value), topWords.value, false);
            }
            if (stateFile.value != null) {
                polylingualTopicModel.printState(new File(stateFile.value));
            }
            if (docTopicsFile.value != null) {
                PrintWriter printWriter3 = new PrintWriter(new FileWriter(new File(docTopicsFile.value)));
                polylingualTopicModel.printDocumentTopics(printWriter3, docTopicsThreshold.value, docTopicsMax.value);
                printWriter3.close();
            }
            if (outputModelFilename.value != null) {
                if (!$assertionsDisabled && polylingualTopicModel == null) {
                    throw new AssertionError();
                }
                try {
                    ObjectOutputStream objectOutputStream3 = new ObjectOutputStream(new FileOutputStream(outputModelFilename.value));
                    objectOutputStream3.writeObject(polylingualTopicModel);
                    objectOutputStream3.close();
                    return;
                } catch (Exception e3) {
                    e3.printStackTrace();
                    throw new IllegalArgumentException("Couldn't write topic model to filename " + outputModelFilename.value);
                }
            }
            return;
        }
        ParallelTopicModel parallelTopicModel = null;
        if (inputModelFilename.value != null) {
            try {
                parallelTopicModel = ParallelTopicModel.read(new File(inputModelFilename.value));
            } catch (Exception e4) {
                System.err.println("Unable to restore saved topic model " + inputModelFilename.value + ": " + e4);
                System.exit(1);
            }
        } else {
            InstanceList load3 = InstanceList.load(new File(inputFile.value));
            System.out.println("Data loaded.");
            if (load3.size() > 0 && load3.get(0) != null && !(load3.get(0).getData() instanceof FeatureSequence)) {
                System.err.println("Topic modeling currently only supports feature sequences: use --keep-sequence option when importing data.");
                System.exit(1);
            }
            parallelTopicModel = new ParallelTopicModel(numTopics.value, alpha.value, beta.value);
            if (randomSeed.value != 0) {
                parallelTopicModel.setRandomSeed(randomSeed.value);
            }
            parallelTopicModel.addInstances(load3);
        }
        parallelTopicModel.setTopicDisplay(showTopicsInterval.value, topWords.value);
        parallelTopicModel.setNumIterations(numIterations.value);
        parallelTopicModel.setOptimizeInterval(optimizeInterval.value);
        parallelTopicModel.setBurninPeriod(optimizeBurnIn.value);
        parallelTopicModel.setSymmetricAlpha(useSymmetricAlpha.value);
        if (outputStateInterval.value != 0) {
            parallelTopicModel.setSaveState(outputStateInterval.value, stateFile.value);
        }
        if (outputModelInterval.value != 0) {
            parallelTopicModel.setSaveSerializedModel(outputModelInterval.value, outputModelFilename.value);
        }
        parallelTopicModel.setNumThreads(numThreads.value);
        parallelTopicModel.estimate();
        if (topicKeysFile.value != null) {
            parallelTopicModel.printTopWords(new File(topicKeysFile.value), topWords.value, false);
        }
        if (topicReportXMLFile.value != null) {
            PrintWriter printWriter4 = new PrintWriter(topicReportXMLFile.value);
            parallelTopicModel.topicXMLReport(printWriter4, topWords.value);
            printWriter4.close();
        }
        if (topicPhraseReportXMLFile.value != null) {
            PrintWriter printWriter5 = new PrintWriter(topicPhraseReportXMLFile.value);
            parallelTopicModel.topicPhraseXMLReport(printWriter5, topWords.value);
            printWriter5.close();
        }
        if (stateFile.value != null) {
            parallelTopicModel.printState(new File(stateFile.value));
        }
        if (docTopicsFile.value != null) {
            PrintWriter printWriter6 = new PrintWriter(new FileWriter(new File(docTopicsFile.value)));
            parallelTopicModel.printDocumentTopics(printWriter6, docTopicsThreshold.value, docTopicsMax.value);
            printWriter6.close();
        }
        if (topicWordWeightsFile.value != null) {
            parallelTopicModel.printTopicWordWeights(new File(topicWordWeightsFile.value));
        }
        if (wordTopicCountsFile.value != null) {
            parallelTopicModel.printTypeTopicCounts(new File(wordTopicCountsFile.value));
        }
        if (outputModelFilename.value != null) {
            if (!$assertionsDisabled && parallelTopicModel == null) {
                throw new AssertionError();
            }
            try {
                ObjectOutputStream objectOutputStream4 = new ObjectOutputStream(new FileOutputStream(outputModelFilename.value));
                objectOutputStream4.writeObject(parallelTopicModel);
                objectOutputStream4.close();
            } catch (Exception e5) {
                e5.printStackTrace();
                throw new IllegalArgumentException("Couldn't write topic model to filename " + outputModelFilename.value);
            }
        }
        if (inferencerFilename.value != null) {
            try {
                ObjectOutputStream objectOutputStream5 = new ObjectOutputStream(new FileOutputStream(inferencerFilename.value));
                objectOutputStream5.writeObject(parallelTopicModel.getInferencer());
                objectOutputStream5.close();
            } catch (Exception e6) {
                System.err.println(e6.getMessage());
            }
        }
        if (evaluatorFilename.value != null) {
            try {
                ObjectOutputStream objectOutputStream6 = new ObjectOutputStream(new FileOutputStream(evaluatorFilename.value));
                objectOutputStream6.writeObject(parallelTopicModel.getProbEstimator());
                objectOutputStream6.close();
            } catch (Exception e7) {
                System.err.println(e7.getMessage());
            }
        }
    }

    static {
        $assertionsDisabled = !Vectors2Topics.class.desiredAssertionStatus();
        inputFile = new CommandOption.String(Vectors2Topics.class, JDTRefactoringDescriptor.ATTRIBUTE_INPUT, "FILENAME", true, null, "The filename from which to read the list of training instances.  Use - for stdin.  The instances must be FeatureSequence or FeatureSequenceWithBigrams, not FeatureVector", null);
        languageInputFiles = new CommandOption.SpacedStrings(Vectors2Topics.class, "language-inputs", "FILENAME [FILENAME ...]", true, null, "Filenames for polylingual topic model. Each language should have its own file, with the same number of instances in each file. If a document is missing in one language, there should be an empty instance.", null);
        testingFile = new CommandOption.String(Vectors2Topics.class, "testing", "FILENAME", false, null, "The filename from which to read the list of instances for empirical likelihood calculation.  Use - for stdin.  The instances must be FeatureSequence or FeatureSequenceWithBigrams, not FeatureVector", null);
        outputModelFilename = new CommandOption.String(Vectors2Topics.class, "output-model", "FILENAME", true, null, "The filename in which to write the binary topic model at the end of the iterations.  By default this is null, indicating that no file will be written.", null);
        inputModelFilename = new CommandOption.String(Vectors2Topics.class, "input-model", "FILENAME", true, null, "The filename from which to read the binary topic model to which the --input will be appended, allowing incremental training.  By default this is null, indicating that no file will be read.", null);
        inferencerFilename = new CommandOption.String(Vectors2Topics.class, "inferencer-filename", "FILENAME", true, null, "A topic inferencer applies a previously trained topic model to new documents.  By default this is null, indicating that no file will be written.", null);
        evaluatorFilename = new CommandOption.String(Vectors2Topics.class, "evaluator-filename", "FILENAME", true, null, "A held-out likelihood evaluator for new documents.  By default this is null, indicating that no file will be written.", null);
        stateFile = new CommandOption.String(Vectors2Topics.class, "output-state", "FILENAME", true, null, "The filename in which to write the Gibbs sampling state after at the end of the iterations.  By default this is null, indicating that no file will be written.", null);
        topicKeysFile = new CommandOption.String(Vectors2Topics.class, "output-topic-keys", "FILENAME", true, null, "The filename in which to write the top words for each topic and any Dirichlet parameters.  By default this is null, indicating that no file will be written.", null);
        topicWordWeightsFile = new CommandOption.String(Vectors2Topics.class, "topic-word-weights-file", "FILENAME", true, null, "The filename in which to write unnormalized weights for every topic and word type.  By default this is null, indicating that no file will be written.", null);
        wordTopicCountsFile = new CommandOption.String(Vectors2Topics.class, "word-topic-counts-file", "FILENAME", true, null, "The filename in which to write a sparse representation of topic-word assignments.  By default this is null, indicating that no file will be written.", null);
        topicReportXMLFile = new CommandOption.String(Vectors2Topics.class, "xml-topic-report", "FILENAME", true, null, "The filename in which to write the top words for each topic and any Dirichlet parameters in XML format.  By default this is null, indicating that no file will be written.", null);
        topicPhraseReportXMLFile = new CommandOption.String(Vectors2Topics.class, "xml-topic-phrase-report", "FILENAME", true, null, "The filename in which to write the top words and phrases for each topic and any Dirichlet parameters in XML format.  By default this is null, indicating that no file will be written.", null);
        docTopicsFile = new CommandOption.String(Vectors2Topics.class, "output-doc-topics", "FILENAME", true, null, "The filename in which to write the topic proportions per document, at the end of the iterations.  By default this is null, indicating that no file will be written.", null);
        docTopicsThreshold = new CommandOption.Double(Vectors2Topics.class, "doc-topics-threshold", "DECIMAL", true, 0.0d, "When writing topic proportions per document with --output-doc-topics, do not print topics with proportions less than this threshold value.", null);
        docTopicsMax = new CommandOption.Integer(Vectors2Topics.class, "doc-topics-max", "INTEGER", true, -1, "When writing topic proportions per document with --output-doc-topics, do not print more than INTEGER number of topics.  A negative value indicates that all topics should be printed.", null);
        numTopics = new CommandOption.Integer(Vectors2Topics.class, "num-topics", "INTEGER", true, 10, "The number of topics to fit.", null);
        numThreads = new CommandOption.Integer(Vectors2Topics.class, "num-threads", "INTEGER", true, 1, "The number of threads for parallel training.", null);
        numIterations = new CommandOption.Integer(Vectors2Topics.class, "num-iterations", "INTEGER", true, 1000, "The number of iterations of Gibbs sampling.", null);
        randomSeed = new CommandOption.Integer(Vectors2Topics.class, "random-seed", "INTEGER", true, 0, "The random seed for the Gibbs sampler.  Default is 0, which will use the clock.", null);
        topWords = new CommandOption.Integer(Vectors2Topics.class, "num-top-words", "INTEGER", true, 20, "The number of most probable words to print for each topic after model estimation.", null);
        showTopicsInterval = new CommandOption.Integer(Vectors2Topics.class, "show-topics-interval", "INTEGER", true, 50, "The number of iterations between printing a brief summary of the topics so far.", null);
        outputModelInterval = new CommandOption.Integer(Vectors2Topics.class, "output-model-interval", "INTEGER", true, 0, "The number of iterations between writing the model (and its Gibbs sampling state) to a binary file.  You must also set the --output-model to use this option, whose argument will be the prefix of the filenames.", null);
        outputStateInterval = new CommandOption.Integer(Vectors2Topics.class, "output-state-interval", "INTEGER", true, 0, "The number of iterations between writing the sampling state to a text file.  You must also set the --output-state to use this option, whose argument will be the prefix of the filenames.", null);
        optimizeInterval = new CommandOption.Integer(Vectors2Topics.class, "optimize-interval", "INTEGER", true, 0, "The number of iterations between reestimating dirichlet hyperparameters.", null);
        optimizeBurnIn = new CommandOption.Integer(Vectors2Topics.class, "optimize-burn-in", "INTEGER", true, 200, "The number of iterations to run before first estimating dirichlet hyperparameters.", null);
        useSymmetricAlpha = new CommandOption.Boolean(Vectors2Topics.class, "use-symmetric-alpha", "true|false", false, false, "Only optimize the concentration parameter of the prior over document-topic distributions. This may reduce the number of very small, poorly estimated topics, but may disperse common words over several topics.", null);
        useNgrams = new CommandOption.Boolean(Vectors2Topics.class, "use-ngrams", "true|false", false, false, "Rather than using LDA, use Topical-N-Grams, which models phrases.", null);
        usePAM = new CommandOption.Boolean(Vectors2Topics.class, "use-pam", "true|false", false, false, "Rather than using LDA, use Pachinko Allocation Model, which models topical correlations.You cannot do this and also --use-ngrams.", null);
        alpha = new CommandOption.Double(Vectors2Topics.class, "alpha", "DECIMAL", true, 50.0d, "Alpha parameter: smoothing over topic distribution.", null);
        beta = new CommandOption.Double(Vectors2Topics.class, "beta", "DECIMAL", true, 0.01d, "Beta parameter: smoothing over unigram distribution.", null);
        gamma = new CommandOption.Double(Vectors2Topics.class, "gamma", "DECIMAL", true, 0.01d, "Gamma parameter: smoothing over bigram distribution", null);
        delta = new CommandOption.Double(Vectors2Topics.class, "delta", "DECIMAL", true, 0.03d, "Delta parameter: smoothing over choice of unigram/bigram", null);
        delta1 = new CommandOption.Double(Vectors2Topics.class, "delta1", "DECIMAL", true, 0.2d, "Topic N-gram smoothing parameter", null);
        delta2 = new CommandOption.Double(Vectors2Topics.class, "delta2", "DECIMAL", true, 1000.0d, "Topic N-gram smoothing parameter", null);
        pamNumSupertopics = new CommandOption.Integer(Vectors2Topics.class, "pam-num-supertopics", "INTEGER", true, 10, "When using the Pachinko Allocation Model (PAM) set the number of supertopics.  Typically this is about half the number of subtopics, although more may help.", null);
        pamNumSubtopics = new CommandOption.Integer(Vectors2Topics.class, "pam-num-subtopics", "INTEGER", true, 20, "When using the Pachinko Allocation Model (PAM) set the number of subtopics.", null);
    }
}
