package codemining.lm.util;

import codemining.languagetools.ITokenizer;
import com.google.common.base.Preconditions;
import com.google.common.collect.BiMap;
import com.google.common.collect.HashBiMap;
import com.google.common.math.LongMath;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.apache.commons.io.FileUtils;

/* loaded from: input_file:codemining/lm/util/VocabularyToInt.class */
public class VocabularyToInt {
    final ITokenizer tokenizer;
    public static final String UNK_SYMBOL = "UNK_SYMBOL";
    private int nextId;
    private final BiMap<String, Integer> alphabet = HashBiMap.create();

    public VocabularyToInt(ITokenizer iTokenizer, Set<String> set) {
        this.tokenizer = iTokenizer;
        this.alphabet.put("UNK_SYMBOL", Integer.MIN_VALUE);
        this.nextId = -2147483647;
        assignIdsToVocabulary(set);
    }

    private synchronized void assignIdsToVocabulary(Set<String> set) {
        Preconditions.checkArgument(((long) set.size()) < LongMath.checkedAdd(2147483647L, -2147483648L), "Too large vocabulary. It cannot fit in an int. Consider pruning more");
        Iterator<String> it = set.iterator();
        while (it.hasNext()) {
            this.alphabet.put(it.next(), Integer.valueOf(this.nextId));
            this.nextId++;
        }
    }

    public int[] codeToIntSequence(String str) {
        List<ITokenizer.FullToken> tokenListFromCode = this.tokenizer.getTokenListFromCode(str.toCharArray());
        int[] iArr = new int[tokenListFromCode.size()];
        for (int i = 0; i < iArr.length; i++) {
            Integer num = this.alphabet.get(tokenListFromCode.get(i).token);
            if (num != null) {
                iArr[i] = num.intValue();
            } else {
                iArr[i] = this.alphabet.get("UNK_SYMBOL").intValue();
            }
        }
        return iArr;
    }

    public int[] fileToIntSequence(File file) throws IOException {
        return codeToIntSequence(FileUtils.readFileToString(file));
    }

    public BiMap<String, Integer> getAlphabet() {
        return this.alphabet;
    }
}
