package codemining.python.codeutils;

import codemining.languagetools.ITokenizer;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.SortedMap;
import java.util.TreeMap;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.filefilter.AbstractFileFilter;
import org.apache.commons.io.filefilter.RegexFileFilter;
import org.apache.commons.lang.NotImplementedException;
import org.python.pydev.parser.grammarcommon.ITokenManager;
import org.python.pydev.parser.jython.FastCharStream;
import org.python.pydev.parser.jython.Token;

/* loaded from: input_file:codemining/python/codeutils/AbstractPythonTokenizer.class */
public abstract class AbstractPythonTokenizer implements ITokenizer {
    private static final long serialVersionUID = 5009530263783901964L;
    private static final RegexFileFilter pythonCodeFilter = new RegexFileFilter(".*\\.py$");

    @Override // codemining.languagetools.ITokenizer
    public SortedMap<Integer, ITokenizer.FullToken> fullTokenListWithPos(char[] cArr) {
        ITokenManager pythonTokenizer = getPythonTokenizer(new FastCharStream(cArr));
        TreeMap newTreeMap = Maps.newTreeMap();
        Token nextToken = pythonTokenizer.getNextToken();
        while (true) {
            Token token = nextToken;
            if (token.kind == 0) {
                return newTreeMap;
            }
            if (shouldAdd(token)) {
                newTreeMap.put(Integer.valueOf((token.getBeginLine() * 500) + token.getBeginCol()), new ITokenizer.FullToken(token.image, Integer.toString(token.kind)));
            }
            nextToken = pythonTokenizer.getNextToken();
        }
    }

    @Override // codemining.languagetools.ITokenizer
    public AbstractFileFilter getFileFilter() {
        return pythonCodeFilter;
    }

    @Override // codemining.languagetools.ITokenizer
    public String getIdentifierType() {
        return "94";
    }

    @Override // codemining.languagetools.ITokenizer
    public Collection<String> getKeywordTypes() {
        throw new NotImplementedException();
    }

    @Override // codemining.languagetools.ITokenizer
    public Collection<String> getLiteralTypes() {
        throw new NotImplementedException();
    }

    public abstract ITokenManager getPythonTokenizer(FastCharStream fastCharStream);

    @Override // codemining.languagetools.ITokenizer
    public ITokenizer.FullToken getTokenFromString(String str) {
        Token nextToken = getPythonTokenizer(new FastCharStream(str.toCharArray())).getNextToken();
        return new ITokenizer.FullToken(nextToken.image, Integer.toString(nextToken.kind));
    }

    @Override // codemining.languagetools.ITokenizer
    public List<ITokenizer.FullToken> getTokenListFromCode(char[] cArr) {
        ITokenManager pythonTokenizer = getPythonTokenizer(new FastCharStream(cArr));
        ArrayList newArrayList = Lists.newArrayList();
        Token nextToken = pythonTokenizer.getNextToken();
        while (true) {
            Token token = nextToken;
            if (token.kind == 0) {
                return newArrayList;
            }
            if (shouldAdd(token)) {
                newArrayList.add(new ITokenizer.FullToken(token.image, Integer.toString(token.kind)));
            }
            nextToken = pythonTokenizer.getNextToken();
        }
    }

    @Override // codemining.languagetools.ITokenizer
    public List<ITokenizer.FullToken> getTokenListFromCode(File file) throws IOException {
        return getTokenListFromCode(FileUtils.readFileToString(file).toCharArray());
    }

    public boolean shouldAdd(Token token) {
        return (token.kind == 6 || token.kind == 14 || token.kind == 13 || token.kind == 115) ? false : true;
    }

    @Override // codemining.languagetools.ITokenizer
    public List<String> tokenListFromCode(char[] cArr) {
        ITokenManager pythonTokenizer = getPythonTokenizer(new FastCharStream(cArr));
        ArrayList newArrayList = Lists.newArrayList();
        Token nextToken = pythonTokenizer.getNextToken();
        while (true) {
            Token token = nextToken;
            if (token.kind == 0) {
                return newArrayList;
            }
            if (shouldAdd(token)) {
                newArrayList.add(token.image);
            }
            nextToken = pythonTokenizer.getNextToken();
        }
    }

    @Override // codemining.languagetools.ITokenizer
    public List<String> tokenListFromCode(File file) throws IOException {
        return tokenListFromCode(FileUtils.readFileToString(file).toCharArray());
    }

    @Override // codemining.languagetools.ITokenizer
    public SortedMap<Integer, String> tokenListWithPos(char[] cArr) {
        ITokenManager pythonTokenizer = getPythonTokenizer(new FastCharStream(cArr));
        TreeMap newTreeMap = Maps.newTreeMap();
        Token nextToken = pythonTokenizer.getNextToken();
        while (true) {
            Token token = nextToken;
            if (token.kind == 0) {
                return newTreeMap;
            }
            if (shouldAdd(token)) {
                newTreeMap.put(Integer.valueOf((token.getBeginLine() * 500) + token.getBeginCol()), token.image);
            }
            nextToken = pythonTokenizer.getNextToken();
        }
    }

    @Override // codemining.languagetools.ITokenizer
    public SortedMap<Integer, ITokenizer.FullToken> tokenListWithPos(File file) throws IOException {
        return fullTokenListWithPos(FileUtils.readFileToString(file).toCharArray());
    }
}
