package codemining.lm.ngram.cache;

import codemining.languagetools.ITokenizer;
import codemining.lm.ILanguageModel;
import codemining.lm.ITokenGeneratingLanguageModel;
import codemining.lm.ngram.AbstractNGramLM;
import codemining.lm.ngram.NGram;
import codemining.lm.ngram.cache.ParameterOptimizer;
import codemining.lm.ngram.cache.SymbolicWeightCache;
import codemining.util.SettingsLoader;
import com.google.common.base.Preconditions;
import com.google.common.collect.HashMultiset;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.collect.Multiset;
import com.google.common.math.DoubleMath;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.filefilter.AbstractFileFilter;
import org.apache.commons.lang.exception.ExceptionUtils;
import org.apache.commons.math3.dfp.Dfp;

/* loaded from: input_file:codemining/lm/ngram/cache/IdentifierOnlyCachedNGramLM.class */
public class IdentifierOnlyCachedNGramLM implements ITokenGeneratingLanguageModel<ITokenizer.FullToken> {
    private static final long serialVersionUID = -3795883012836136927L;
    private final double cacheWeight;
    private final double cacheDecayConst;
    private final Pattern fileExtention = Pattern.compile("\\.[a-z]+$");
    final AbstractNGramLM baseNgram;
    final AbstractNGramLM typeNgram;
    final String identiferNameType;
    protected static final Logger LOGGER = Logger.getLogger(IdentifierOnlyCachedNGramLM.class.getName());
    public static final int CALIBRATION_SIZE_THRESHOLD = (int) SettingsLoader.getNumericSetting("codemining.lm.ngram.cache.IdentifierOnlyCachedNGramLM.calibrationThreshold", 5.0E7d);
    public static int MAX_GENERATED_SENTENCE_LENGTH = Dfp.RADIX;

    /* loaded from: input_file:codemining/lm/ngram/cache/IdentifierOnlyCachedNGramLM$IdentifierOnlyCachedNGramLMDataExtractor.class */
    class IdentifierOnlyCachedNGramLMDataExtractor extends IdentifierOnlyCachedNGramLM {
        private static final long serialVersionUID = 3047560629014010487L;
        final Multiset<ParameterOptimizer.LPair> elements;

        public IdentifierOnlyCachedNGramLMDataExtractor(AbstractNGramLM abstractNGramLM, AbstractNGramLM abstractNGramLM2, String str) {
            super(abstractNGramLM, abstractNGramLM2, str, 0.5d, 0.5d);
            this.elements = HashMultiset.create();
        }

        @Override // codemining.lm.ngram.cache.IdentifierOnlyCachedNGramLM
        protected ICache<String> createCache(String str) {
            return new SymbolicWeightCache(0.5d, str);
        }

        final Multiset<ParameterOptimizer.LPair> getDataParameters(Collection<File> collection) {
            Iterator<File> it = collection.iterator();
            while (it.hasNext()) {
                try {
                    getAbsoluteEntropy(it.next());
                } catch (IOException e) {
                    LOGGER.warning(ExceptionUtils.getFullStackTrace(e));
                }
                if (this.elements.entrySet().size() > CALIBRATION_SIZE_THRESHOLD) {
                    break;
                }
            }
            return this.elements;
        }

        @Override // codemining.lm.ngram.cache.IdentifierOnlyCachedNGramLM
        public double getProbabilityFor(NGram<String> nGram, ICache<String> iCache, double d) {
            double probabilityFor = this.baseNgram.getProbabilityFor(nGram);
            List<SymbolicWeightCache.DecayFactor> decayFactorFor = ((SymbolicWeightCache) iCache).getDecayFactorFor(nGram.get(nGram.size() - 1));
            ParameterOptimizer.LPair lPair = new ParameterOptimizer.LPair();
            lPair.ngramProb = probabilityFor;
            if (decayFactorFor != null) {
                lPair.cacheProb = Lists.newArrayList(decayFactorFor);
            } else {
                lPair.cacheProb = Lists.newArrayList();
            }
            lPair.importance = d;
            Preconditions.checkArgument(probabilityFor > 0.0d && probabilityFor <= 1.0d, "N-gram probablity should be between 0,1 but is " + probabilityFor);
            this.elements.add(lPair);
            return 0.5d;
        }
    }

    public IdentifierOnlyCachedNGramLM(AbstractNGramLM abstractNGramLM, AbstractNGramLM abstractNGramLM2, String str, Collection<File> collection) {
        this.baseNgram = abstractNGramLM;
        this.typeNgram = abstractNGramLM2;
        this.identiferNameType = str;
        ParameterOptimizer parameterOptimizer = new ParameterOptimizer(new IdentifierOnlyCachedNGramLMDataExtractor(abstractNGramLM, abstractNGramLM2, str).getDataParameters(collection));
        parameterOptimizer.optimizeParameters();
        this.cacheWeight = parameterOptimizer.currentLambda;
        this.cacheDecayConst = parameterOptimizer.decay;
    }

    public IdentifierOnlyCachedNGramLM(AbstractNGramLM abstractNGramLM, AbstractNGramLM abstractNGramLM2, String str, double d, double d2) {
        this.baseNgram = abstractNGramLM;
        this.typeNgram = abstractNGramLM2;
        this.identiferNameType = str;
        this.cacheWeight = d;
        this.cacheDecayConst = d2;
    }

    private NGram<String> constructIdentNgram(NGram<String> nGram) {
        ArrayList newArrayList = Lists.newArrayList();
        Iterator<String> it = nGram.iterator();
        while (it.hasNext()) {
            newArrayList.add(it.next());
        }
        newArrayList.set(newArrayList.size() - 1, this.identiferNameType);
        return new NGram<>(newArrayList);
    }

    protected ICache<String> createCache(String str) {
        return new WeightCache(this.cacheDecayConst, str);
    }

    @Override // codemining.lm.ITokenGeneratingLanguageModel
    public List<ITokenizer.FullToken> generateSentence() {
        ArrayList newArrayList = Lists.newArrayList();
        ArrayList newArrayList2 = Lists.newArrayList();
        ArrayList newArrayList3 = Lists.newArrayList();
        ITokenizer.FullToken fullToken = new ITokenizer.FullToken(ITokenizer.SENTENCE_START, ITokenizer.SENTENCE_START);
        ITokenizer.FullToken fullToken2 = new ITokenizer.FullToken(ITokenizer.SENTENCE_END, ITokenizer.SENTENCE_END);
        ICache<String> iCache = null;
        int i = 0;
        while (!fullToken.equals(fullToken2) && i < MAX_GENERATED_SENTENCE_LENGTH) {
            newArrayList.add(fullToken);
            newArrayList2.add(fullToken.token);
            newArrayList3.add(fullToken.tokenType);
            i++;
            if (fullToken.tokenType.equals(this.identiferNameType)) {
                if (iCache == null) {
                    iCache = createCache(fullToken.token);
                } else {
                    iCache.pushElement(fullToken.token);
                }
            }
            if (!this.typeNgram.pickRandom(NGram.constructNgramAt(newArrayList3.size() - 1, newArrayList3, this.typeNgram.getN() - 1)).equals(this.identiferNameType) || Math.random() >= this.cacheWeight || iCache == null) {
                String pickRandom = this.baseNgram.pickRandom(NGram.constructNgramAt(newArrayList2.size() - 1, newArrayList2, this.baseNgram.getN() - 1));
                fullToken = this.baseNgram.getTokenizer().getTokenFromString(pickRandom);
                if (pickRandom.equals(AbstractNGramLM.UNK_SYMBOL)) {
                    fullToken = new ITokenizer.FullToken(iCache.getRandomElement(), this.identiferNameType);
                }
            } else {
                fullToken = new ITokenizer.FullToken(iCache.getRandomElement(), this.identiferNameType);
            }
        }
        newArrayList.add(fullToken);
        newArrayList2.add(fullToken.token);
        newArrayList3.add(fullToken.tokenType);
        return newArrayList;
    }

    @Override // codemining.lm.ILanguageModel
    public double getAbsoluteEntropy(File file) throws IOException {
        return getAbsoluteEntropy(FileUtils.readFileToString(file), getCurrentFilename(file));
    }

    @Override // codemining.lm.ILanguageModel
    public double getAbsoluteEntropy(String str) {
        return getAbsoluteEntropy(str, "");
    }

    public double getAbsoluteEntropy(String str, String str2) {
        char[] charArray = str.toCharArray();
        if (charArray.length == 0) {
            return 0.0d;
        }
        ImmutableList copyOf = ImmutableList.copyOf((Collection) this.baseNgram.getTokenizer().getTokenListFromCode(charArray));
        if (copyOf.isEmpty()) {
            return 0.0d;
        }
        return getLogProbOfSentence(copyOf, str2);
    }

    private String getCurrentFilename(File file) {
        Matcher matcher = this.fileExtention.matcher(file.getName());
        return file.getName().substring(0, file.getName().length() - (matcher.find() ? matcher.group().length() : 0));
    }

    @Override // codemining.lm.ILanguageModel
    public double getExtrinsticEntropy(File file) throws IOException {
        return getExtrinsticEntropy(FileUtils.readFileToString(file), getCurrentFilename(file));
    }

    @Override // codemining.lm.ILanguageModel
    public double getExtrinsticEntropy(String str) {
        return getExtrinsticEntropy(str, "");
    }

    public double getExtrinsticEntropy(String str, String str2) {
        char[] charArray = str.toCharArray();
        if (charArray.length == 0) {
            return 0.0d;
        }
        ImmutableList copyOf = ImmutableList.copyOf((Collection) this.baseNgram.getTokenizer().getTokenListFromCode(charArray));
        if (copyOf.isEmpty()) {
            return 0.0d;
        }
        return getLogProbOfSentence(copyOf, str2) / (copyOf.size() - 1);
    }

    @Override // codemining.lm.ILanguageModel
    public ILanguageModel getImmutableVersion() {
        return this;
    }

    private double getLogProbOfSentence(List<ITokenizer.FullToken> list, String str) {
        ICache<String> createCache = createCache(str);
        double d = 0.0d;
        ArrayList newArrayList = Lists.newArrayList();
        ArrayList newArrayList2 = Lists.newArrayList();
        for (int i = 0; i < list.size(); i++) {
            newArrayList.add(list.get(i).token);
            newArrayList2.add(list.get(i).tokenType);
        }
        for (int i2 = 0; i2 < list.size(); i2++) {
            NGram<String> constructNgramAt = NGram.constructNgramAt(i2, newArrayList2, this.typeNgram.getN());
            NGram<String> constructNgramAt2 = NGram.constructNgramAt(i2, newArrayList, this.baseNgram.getN());
            double probabilityFor = this.typeNgram.getProbabilityFor(constructIdentNgram(constructNgramAt));
            if (constructNgramAt2.size() > 1) {
                double probabilityFor2 = (probabilityFor * getProbabilityFor(constructNgramAt2, createCache, probabilityFor)) + ((1.0d - probabilityFor) * this.baseNgram.getProbabilityFor(constructNgramAt2));
                Preconditions.checkArgument(probabilityFor2 > 0.0d);
                Preconditions.checkArgument(!Double.isInfinite(probabilityFor2));
                d += DoubleMath.log2(probabilityFor2);
            }
            if (constructNgramAt.get(constructNgramAt.size() - 1).equals(this.identiferNameType)) {
                createCache.pushElement(constructNgramAt2.get(constructNgramAt2.size() - 1));
            }
        }
        return d;
    }

    public double getProbabilityFor(NGram<String> nGram, ICache<String> iCache, double d) {
        return ((1.0d - this.cacheWeight) * this.baseNgram.getProbabilityFor(nGram)) + (this.cacheWeight * iCache.getProbabilityFor(nGram.get(nGram.size() - 1)));
    }

    @Override // codemining.lm.ITokenGeneratingLanguageModel
    public ITokenizer getTokenizer() {
        return this.baseNgram.getTokenizer();
    }

    @Override // codemining.lm.ILanguageModel
    public AbstractFileFilter modelledFilesFilter() {
        return this.baseNgram.modelledFilesFilter();
    }

    @Override // codemining.lm.ILanguageModel
    public void trainIncrementalModel(Collection<File> collection) throws IOException {
        throw new UnsupportedOperationException("CachedNGramLM is an immutable Language Model");
    }

    @Override // codemining.lm.ILanguageModel
    public void trainModel(Collection<File> collection) throws IOException {
        throw new UnsupportedOperationException("CachedNGramLM is an immutable Language Model");
    }
}
