package codemining.lm.hmm;

import codemining.languagetools.ITokenizer;
import codemining.lm.ILanguageModel;
import codemining.lm.ITokenGeneratingLanguageModel;
import codemining.lm.util.TokenVocabularyBuilder;
import codemining.lm.util.VocabularyToInt;
import codemining.util.SettingsLoader;
import com.google.common.collect.Lists;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Set;
import java.util.logging.Logger;
import org.apache.commons.io.filefilter.AbstractFileFilter;
import org.apache.commons.lang.exception.ExceptionUtils;

/* loaded from: input_file:codemining/lm/hmm/HiddenMarkovModelLM.class */
public class HiddenMarkovModelLM implements ITokenGeneratingLanguageModel<ITokenizer.FullToken> {
    private static final long serialVersionUID = 4431685854743499177L;
    final ITokenizer tokenizer;
    VocabularyToInt vocabularyMapper;
    HMM hmm;
    private static final Logger LOGGER = Logger.getLogger(HiddenMarkovModelLM.class.getName());
    public static final int CLEAN_VOCABULARY_THRESHOLD = (int) SettingsLoader.getNumericSetting("CleanVocabularyThreshold", 10.0d);
    private static final int NUM_ITERATIONS = (int) SettingsLoader.getNumericSetting("iterations", 100.0d);

    public HiddenMarkovModelLM(ITokenizer iTokenizer) {
        this.tokenizer = iTokenizer;
    }

    @Override // codemining.lm.ITokenGeneratingLanguageModel
    public List<ITokenizer.FullToken> generateSentence() {
        return null;
    }

    @Override // codemining.lm.ILanguageModel
    public double getAbsoluteEntropy(File file) throws IOException {
        return 0.0d;
    }

    @Override // codemining.lm.ILanguageModel
    public double getAbsoluteEntropy(String str) {
        return 0.0d;
    }

    @Override // codemining.lm.ILanguageModel
    public double getExtrinsticEntropy(File file) throws IOException {
        return 0.0d;
    }

    @Override // codemining.lm.ILanguageModel
    public double getExtrinsticEntropy(String str) {
        return 0.0d;
    }

    @Override // codemining.lm.ILanguageModel
    public ILanguageModel getImmutableVersion() {
        return this;
    }

    @Override // codemining.lm.ITokenGeneratingLanguageModel
    public ITokenizer getTokenizer() {
        return this.tokenizer;
    }

    @Override // codemining.lm.ILanguageModel
    public AbstractFileFilter modelledFilesFilter() {
        return this.tokenizer.getFileFilter();
    }

    @Override // codemining.lm.ILanguageModel
    public void trainIncrementalModel(Collection<File> collection) throws IOException {
        throw new UnsupportedOperationException();
    }

    @Override // codemining.lm.ILanguageModel
    public void trainModel(Collection<File> collection) throws IOException {
        Set<String> buildVocabulary = TokenVocabularyBuilder.buildVocabulary(collection, this.tokenizer, CLEAN_VOCABULARY_THRESHOLD);
        this.vocabularyMapper = new VocabularyToInt(this.tokenizer, buildVocabulary);
        LOGGER.info("Vocabulary Built. Reading files...");
        this.hmm = new HMM(100, buildVocabulary.size());
        ArrayList newArrayList = Lists.newArrayList();
        for (File file : collection) {
            try {
                for (int i : this.vocabularyMapper.fileToIntSequence(file)) {
                    newArrayList.add(Integer.valueOf(i));
                }
            } catch (Throwable th) {
                LOGGER.warning("Failed to add file " + file.getAbsolutePath() + " " + ExceptionUtils.getFullStackTrace(th));
            }
        }
        LOGGER.info("Sequences stored. Stating " + NUM_ITERATIONS + " iterations");
        int[] iArr = new int[newArrayList.size()];
        for (int i2 = 0; i2 < iArr.length; i2++) {
            iArr[i2] = ((Integer) newArrayList.get(i2)).intValue();
        }
        this.hmm.train(iArr, NUM_ITERATIONS);
    }
}
