package codemining.java.codedataextractors;

import codemining.java.codeutils.JavaCodeTokenizer;
import codemining.languagetools.ITokenizer;
import codemining.lm.ITokenGeneratingLanguageModel;
import codemining.util.serialization.ISerializationStrategy;
import codemining.util.serialization.Serializer;
import com.google.common.collect.Maps;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.logging.Logger;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.filefilter.DirectoryFileFilter;
import org.apache.commons.lang.exception.ExceptionUtils;

/* JADX WARN: Classes with same name are omitted:
  input_file:lib/naturalize.jar:codemining/java/codedataextractors/LanguageBurstiness.class
 */
/* loaded from: input_file:naturalize.jar:codemining/java/codedataextractors/LanguageBurstiness.class */
public class LanguageBurstiness {
    protected static final Logger LOGGER = Logger.getLogger(LanguageBurstiness.class.getName());
    final Map<Integer, Occurences> data = Maps.newTreeMap();

    /* JADX INFO: Access modifiers changed from: private */
    /* JADX WARN: Classes with same name are omitted:
      input_file:lib/naturalize.jar:codemining/java/codedataextractors/LanguageBurstiness$Occurences.class
     */
    /* loaded from: input_file:naturalize.jar:codemining/java/codedataextractors/LanguageBurstiness$Occurences.class */
    public static class Occurences {
        int frequency;
        double totalAvgLength;

        private Occurences() {
            this.frequency = 0;
            this.totalAvgLength = 0.0d;
        }

        /* synthetic */ Occurences(Occurences occurences) {
            this();
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* JADX WARN: Classes with same name are omitted:
      input_file:lib/naturalize.jar:codemining/java/codedataextractors/LanguageBurstiness$WordPosition.class
     */
    /* loaded from: input_file:naturalize.jar:codemining/java/codedataextractors/LanguageBurstiness$WordPosition.class */
    public static class WordPosition {
        int occurences;
        int lastPosition;
        int totalLengthBetween;

        private WordPosition() {
            this.occurences = 0;
            this.lastPosition = 0;
            this.totalLengthBetween = 0;
        }

        /* synthetic */ WordPosition(WordPosition wordPosition) {
            this();
        }
    }

    public static void main(String[] strArr) throws ClassNotFoundException, ISerializationStrategy.SerializationException, FileNotFoundException, Exception {
        if (strArr.length < 2) {
            System.err.println("Usage [empirical|generated] dataFolder|model numOfFileToGen");
            return;
        }
        LanguageBurstiness languageBurstiness = new LanguageBurstiness();
        if (strArr[0].equals("empirical")) {
            languageBurstiness.getEmpirical(new JavaCodeTokenizer(), strArr[1]);
        } else if (strArr[0].equals("generated")) {
            languageBurstiness.getGenerated(strArr[1], Integer.parseInt(strArr[2]));
        }
        languageBurstiness.printOccurences();
    }

    void addFileCounts(List<ITokenizer.FullToken> list, String str) {
        for (Map.Entry<String, WordPosition> entry : extractWordData(list, str).entrySet()) {
            int i = entry.getValue().occurences;
            if (!this.data.containsKey(Integer.valueOf(i))) {
                this.data.put(Integer.valueOf(i), new Occurences(null));
            }
            Occurences occurences = this.data.get(Integer.valueOf(i));
            occurences.frequency++;
            WordPosition value = entry.getValue();
            occurences.totalAvgLength += value.totalLengthBetween / value.occurences;
        }
    }

    private Map<String, WordPosition> extractWordData(List<ITokenizer.FullToken> list, String str) {
        TreeMap treeMap = new TreeMap();
        for (int i = 0; i < list.size(); i++) {
            ITokenizer.FullToken fullToken = list.get(i);
            if (fullToken.tokenType.equals(str)) {
                if (treeMap.containsKey(fullToken.token)) {
                    WordPosition wordPosition = (WordPosition) treeMap.get(fullToken.token);
                    wordPosition.occurences++;
                    wordPosition.totalLengthBetween += i - wordPosition.lastPosition;
                    wordPosition.lastPosition = i;
                } else {
                    WordPosition wordPosition2 = new WordPosition(null);
                    wordPosition2.occurences = 1;
                    wordPosition2.lastPosition = i;
                    treeMap.put(fullToken.token, wordPosition2);
                }
            }
        }
        return treeMap;
    }

    private void getEmpirical(ITokenizer iTokenizer, String str) {
        Iterator it = FileUtils.listFiles(new File(str), iTokenizer.getFileFilter(), DirectoryFileFilter.DIRECTORY).iterator();
        while (it.hasNext()) {
            try {
                addFileCounts(iTokenizer.getTokenListFromCode(FileUtils.readFileToString((File) it.next()).toCharArray()), iTokenizer.getIdentifierType());
            } catch (IOException e) {
                LOGGER.warning(ExceptionUtils.getFullStackTrace(e));
            }
        }
    }

    private void getGenerated(String str, int i) throws FileNotFoundException, ClassNotFoundException, ISerializationStrategy.SerializationException {
        ITokenGeneratingLanguageModel iTokenGeneratingLanguageModel = (ITokenGeneratingLanguageModel) Serializer.getSerializer().deserializeFrom(str);
        for (int i2 = 0; i2 < i; i2++) {
            addFileCounts(iTokenGeneratingLanguageModel.generateSentence(), iTokenGeneratingLanguageModel.getTokenizer().getIdentifierType());
        }
    }

    void printOccurences() {
        for (Map.Entry<Integer, Occurences> entry : this.data.entrySet()) {
            System.out.println(entry.getKey() + "," + (entry.getValue().totalAvgLength / entry.getValue().frequency) + "," + entry.getValue().frequency);
        }
    }
}
