package codemining.langs.codeutils;

import codemining.languagetools.ITokenizer;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.threecrickets.jygments.ResolutionException;
import com.threecrickets.jygments.grammar.Lexer;
import com.threecrickets.jygments.grammar.Token;
import com.threecrickets.jygments.grammar.TokenType;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.SortedMap;
import java.util.TreeMap;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.filefilter.AbstractFileFilter;
import org.apache.commons.io.filefilter.RegexFileFilter;
import org.eclipse.core.runtime.internal.adaptor.EclipseCommandProvider;
import org.eclipse.osgi.internal.baseadaptor.BaseStorageHook;

/* loaded from: input_file:codemining/langs/codeutils/AbstractJygmentsTokenizer.class */
public abstract class AbstractJygmentsTokenizer implements ITokenizer {
    final Lexer lexer;
    private final RegexFileFilter codeFilter;
    private static final long serialVersionUID = 8826779180772076954L;

    public AbstractJygmentsTokenizer(String str) throws ResolutionException {
        this.lexer = Lexer.getForFileName("sample." + str);
        this.codeFilter = new RegexFileFilter(".*\\." + str + BaseStorageHook.VARIABLE_DELIM_STRING);
    }

    @Override // codemining.languagetools.ITokenizer
    public AbstractFileFilter getFileFilter() {
        return this.codeFilter;
    }

    @Override // codemining.languagetools.ITokenizer
    public List<ITokenizer.FullToken> getTokenListFromCode(File file) throws IOException {
        return getTokenListFromCode(FileUtils.readFileToString(file).toCharArray());
    }

    public abstract String getTokenString(Token token);

    /* JADX INFO: Access modifiers changed from: protected */
    public boolean isProgramToken(Token token) {
        TokenType type = token.getType();
        return type == TokenType.Comment || type == TokenType.Comment_Multiline || type == TokenType.Comment_Single || type == TokenType.Comment_Special || type == TokenType.Comment_Preproc || type == TokenType.Text || token.getValue().equals(" ") || token.getValue().equals(IOUtils.LINE_SEPARATOR_UNIX) || token.getValue().equals(EclipseCommandProvider.TAB);
    }

    @Override // codemining.languagetools.ITokenizer
    public List<String> tokenListFromCode(char[] cArr) {
        Iterable<Token> tokens = this.lexer.getTokens(new String(cArr));
        ArrayList newArrayList = Lists.newArrayList();
        newArrayList.add(ITokenizer.SENTENCE_START);
        for (Token token : tokens) {
            if (!isProgramToken(token)) {
                newArrayList.add(getTokenString(token));
            }
        }
        newArrayList.add(ITokenizer.SENTENCE_END);
        return newArrayList;
    }

    @Override // codemining.languagetools.ITokenizer
    public List<String> tokenListFromCode(File file) throws IOException {
        return tokenListFromCode(FileUtils.readFileToString(file).toCharArray());
    }

    @Override // codemining.languagetools.ITokenizer
    public SortedMap<Integer, String> tokenListWithPos(char[] cArr) {
        Iterable<Token> tokens = this.lexer.getTokens(new String(cArr));
        TreeMap newTreeMap = Maps.newTreeMap();
        newTreeMap.put(-1, ITokenizer.SENTENCE_START);
        newTreeMap.put(Integer.MAX_VALUE, ITokenizer.SENTENCE_END);
        for (Token token : tokens) {
            if (!isProgramToken(token)) {
                newTreeMap.put(Integer.valueOf(token.getPos()), getTokenString(token));
            }
        }
        return newTreeMap;
    }
}
