package codemining.java.codedataextractors;

import codemining.lm.grammar.IGrammarRuleProducer;
import codemining.lm.grammar.cfg.AbstractGrammarRuleSet;
import codemining.lm.grammar.cfg.CFGrammarRule;
import codemining.lm.grammar.cfg.GrammarRuleSet;
import codemining.util.serialization.ISerializationStrategy;
import com.google.common.base.Objects;
import com.google.common.collect.HashMultiset;
import com.google.common.collect.Sets;
import com.google.common.math.DoubleMath;
import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.logging.Logger;

/* JADX WARN: Classes with same name are omitted:
  input_file:lib/naturalize.jar:codemining/java/codedataextractors/TfIdfCode.class
 */
/* loaded from: input_file:naturalize.jar:codemining/java/codedataextractors/TfIdfCode.class */
public class TfIdfCode {
    private static final Logger LOGGER = Logger.getLogger(TfIdfCode.class.getName());
    private final AbstractGrammarRuleSet rules;
    final IGrammarRuleProducer rProducer;

    /* JADX INFO: Access modifiers changed from: private */
    /* JADX WARN: Classes with same name are omitted:
      input_file:lib/naturalize.jar:codemining/java/codedataextractors/TfIdfCode$Rule.class
     */
    /* loaded from: input_file:naturalize.jar:codemining/java/codedataextractors/TfIdfCode$Rule.class */
    public static class Rule implements Comparable<Rule> {
        public long countInDocument;
        public double tfidfScore;
        public CFGrammarRule cfgRule;

        private Rule() {
            this.countInDocument = 0L;
            this.tfidfScore = 0.0d;
        }

        @Override // java.lang.Comparable
        public int compareTo(Rule rule) {
            return Double.compare(rule.tfidfScore, this.tfidfScore);
        }

        public boolean equals(Object obj) {
            return (obj instanceof Rule) && Double.compare(((Rule) obj).tfidfScore, this.tfidfScore) == 0;
        }

        public int hashCode() {
            return Objects.hashCode(Double.valueOf(this.tfidfScore));
        }

        public String toString() {
            return String.valueOf(this.cfgRule.toString()) + " tfidf:" + this.tfidfScore;
        }

        /* synthetic */ Rule(Rule rule) {
            this();
        }
    }

    public static void main(String[] strArr) throws ClassNotFoundException, ISerializationStrategy.SerializationException, IOException {
        printTopNterms(new TfIdfCode(AbstractGrammarRuleSet.readFromSerialized(strArr[0])).getTfIdfForFile(new File(strArr[1])), 50);
    }

    public static void printTopNterms(SortedSet<Rule> sortedSet, int i) {
        TreeSet newTreeSet = Sets.newTreeSet(sortedSet);
        for (int i2 = 0; i2 < i; i2++) {
            System.out.println(newTreeSet.first());
            newTreeSet.remove(newTreeSet.first());
        }
    }

    public TfIdfCode(AbstractGrammarRuleSet abstractGrammarRuleSet) {
        this.rules = abstractGrammarRuleSet;
        this.rProducer = abstractGrammarRuleSet.getGrammarFormat();
    }

    public SortedSet<Rule> getTfIdfForFile(File file) throws IOException {
        List<CFGrammarRule> grammarRulesFromFile = this.rProducer.getGrammarRulesFromFile(file);
        HashMultiset create = HashMultiset.create();
        create.addAll(grammarRulesFromFile);
        GrammarRuleSet grammarRuleSet = new GrammarRuleSet(this.rProducer);
        grammarRuleSet.addRules(grammarRulesFromFile);
        TreeSet newTreeSet = Sets.newTreeSet();
        for (E e : create.elementSet()) {
            Rule rule = new Rule(null);
            rule.cfgRule = e;
            rule.tfidfScore = grammarRuleSet.getRuleMLFrequency(e) * (-DoubleMath.log2(this.rules.getRuleMLFrequency(e)));
            newTreeSet.add(rule);
        }
        return newTreeSet;
    }
}
