package codemining.lm.util;

import codemining.languagetools.ITokenizer;
import codemining.util.parallel.ParallelThreadPool;
import com.google.common.collect.ConcurrentHashMultiset;
import com.google.common.collect.Multiset;
import java.io.File;
import java.io.IOException;
import java.util.ArrayDeque;
import java.util.Collection;
import java.util.Iterator;
import java.util.Set;
import java.util.logging.Logger;
import org.apache.commons.lang.exception.ExceptionUtils;

/* loaded from: input_file:codemining/lm/util/TokenVocabularyBuilder.class */
public class TokenVocabularyBuilder {
    private static final Logger LOGGER = Logger.getLogger(TokenVocabularyBuilder.class.getName());

    /* loaded from: input_file:codemining/lm/util/TokenVocabularyBuilder$VocabularyExtractorRunnable.class */
    private static class VocabularyExtractorRunnable implements Runnable {
        final File codeFile;
        final ConcurrentHashMultiset<String> vocabularySet;
        final ITokenizer tokenizer;

        public VocabularyExtractorRunnable(File file, ConcurrentHashMultiset<String> concurrentHashMultiset, ITokenizer iTokenizer) {
            this.codeFile = file;
            this.vocabularySet = concurrentHashMultiset;
            this.tokenizer = iTokenizer;
        }

        @Override // java.lang.Runnable
        public void run() {
            TokenVocabularyBuilder.LOGGER.finer("Reading file " + this.codeFile.getAbsolutePath());
            try {
                this.vocabularySet.addAll(this.tokenizer.tokenListFromCode(this.codeFile));
            } catch (IOException e) {
                TokenVocabularyBuilder.LOGGER.warning(ExceptionUtils.getFullStackTrace(e));
            }
        }
    }

    public static Set<String> buildVocabulary(Collection<File> collection, ITokenizer iTokenizer, int i) {
        ConcurrentHashMultiset create = ConcurrentHashMultiset.create();
        ParallelThreadPool parallelThreadPool = new ParallelThreadPool();
        Iterator<File> it = collection.iterator();
        while (it.hasNext()) {
            parallelThreadPool.pushTask(new VocabularyExtractorRunnable(it.next(), create, iTokenizer));
        }
        parallelThreadPool.waitForTermination();
        pruneElementsFromMultiset(i, create);
        LOGGER.info("Vocabulary built, with " + create.elementSet().size() + " words");
        return create.elementSet();
    }

    public static void pruneElementsFromMultiset(int i, ConcurrentHashMultiset<String> concurrentHashMultiset) {
        ArrayDeque arrayDeque = new ArrayDeque();
        for (Multiset.Entry entry : concurrentHashMultiset.entrySet()) {
            if (entry.getCount() <= i) {
                arrayDeque.add(entry);
            }
        }
        Iterator it = arrayDeque.iterator();
        while (it.hasNext()) {
            Multiset.Entry entry2 = (Multiset.Entry) it.next();
            concurrentHashMultiset.remove(entry2.getElement(), entry2.getCount());
        }
    }

    private TokenVocabularyBuilder() {
    }
}
