package codemining.java.tokenizers;

import codemining.languagetools.ITokenizer;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.logging.Logger;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.filefilter.AbstractFileFilter;
import org.apache.commons.io.filefilter.RegexFileFilter;
import org.apache.commons.lang.exception.ExceptionUtils;
import org.eclipse.core.runtime.internal.adaptor.EclipseCommandProvider;
import org.eclipse.jdt.core.compiler.InvalidInputException;
import org.eclipse.jdt.internal.core.util.PublicScanner;

/* loaded from: input_file:codemining/java/tokenizers/JavaTokenizer.class */
public class JavaTokenizer implements ITokenizer {
    private static final long serialVersionUID = 505587999946057082L;
    private final boolean tokenizeComments;
    private static final Logger LOGGER = Logger.getLogger(JavaTokenizer.class.getName());
    public static final RegexFileFilter javaCodeFileFilter = new RegexFileFilter(".*\\.java$");
    public static final String IDENTIFIER_ID = Integer.toString(5);
    public static final String[] KEYWORD_TYPE_IDs = {Integer.toString(18), Integer.toString(19), Integer.toString(20), Integer.toString(21), Integer.toString(22), Integer.toString(23), Integer.toString(24), Integer.toString(25), Integer.toString(26)};
    public static final String[] STRING_LITERAL_IDs = {Integer.toString(45), Integer.toString(44)};
    public static final String[] NUMBER_LITERAL_IDs = {Integer.toString(43), Integer.toString(42), Integer.toString(40), Integer.toString(41)};
    public static final String[] COMMENT_IDs = {Integer.toString(1002), Integer.toString(1003), Integer.toString(1001)};
    public static final String[] OPERATOR_IDs = {Integer.toString(62), Integer.toString(79), Integer.toString(172), Integer.toString(154), Integer.toString(90), Integer.toString(10), Integer.toString(171), Integer.toString(6), Integer.toString(402), Integer.toString(167), Integer.toString(35), Integer.toString(68), Integer.toString(67), Integer.toString(15), Integer.toString(14), Integer.toString(176), Integer.toString(69), Integer.toString(66), Integer.toString(7), Integer.toString(4), Integer.toString(169), Integer.toString(2), Integer.toString(8), Integer.toString(170), Integer.toString(71), Integer.toString(36), Integer.toString(70), Integer.toString(173), Integer.toString(80), Integer.toString(3), Integer.toString(168), Integer.toString(1), Integer.toString(81), Integer.toString(166), Integer.toString(9), Integer.toString(175), Integer.toString(11), Integer.toString(177), Integer.toString(86), Integer.toString(64), Integer.toString(72), Integer.toString(12), Integer.toString(178), Integer.toString(63), Integer.toString(174)};
    public static final String[] BRACE_IDs = {Integer.toString(110), Integer.toString(95)};
    public static final String[] SYNTAX_IDs = {Integer.toString(90), Integer.toString(6), Integer.toString(402), Integer.toString(64), Integer.toString(110), Integer.toString(95), Integer.toString(7), Integer.toString(86), Integer.toString(15), Integer.toString(166)};

    public JavaTokenizer() {
        this.tokenizeComments = false;
    }

    public JavaTokenizer(boolean z) {
        this.tokenizeComments = z;
    }

    @Override // codemining.languagetools.ITokenizer
    public SortedMap<Integer, ITokenizer.FullToken> fullTokenListWithPos(char[] cArr) {
        int nextToken;
        PublicScanner prepareScanner = prepareScanner();
        TreeMap newTreeMap = Maps.newTreeMap();
        newTreeMap.put(-1, new ITokenizer.FullToken(ITokenizer.SENTENCE_START, ITokenizer.SENTENCE_START));
        newTreeMap.put(Integer.MAX_VALUE, new ITokenizer.FullToken(ITokenizer.SENTENCE_END, ITokenizer.SENTENCE_END));
        prepareScanner.setSource(cArr);
        while (!prepareScanner.atEnd()) {
            do {
                try {
                    nextToken = prepareScanner.getNextToken();
                } catch (InvalidInputException e) {
                    LOGGER.warning(ExceptionUtils.getFullStackTrace(e));
                }
                if (nextToken == 158) {
                    break;
                }
                newTreeMap.put(Integer.valueOf(prepareScanner.getCurrentTokenStartPosition()), new ITokenizer.FullToken(transformToken(nextToken, prepareScanner.getCurrentTokenString()), Integer.toString(nextToken)));
            } while (!prepareScanner.atEnd());
        }
        return newTreeMap;
    }

    @Override // codemining.languagetools.ITokenizer
    public AbstractFileFilter getFileFilter() {
        return javaCodeFileFilter;
    }

    @Override // codemining.languagetools.ITokenizer
    public String getIdentifierType() {
        return IDENTIFIER_ID;
    }

    @Override // codemining.languagetools.ITokenizer
    public Collection<String> getKeywordTypes() {
        return Arrays.asList(KEYWORD_TYPE_IDs);
    }

    @Override // codemining.languagetools.ITokenizer
    public Collection<String> getLiteralTypes() {
        ArrayList newArrayList = Lists.newArrayList(Arrays.asList(NUMBER_LITERAL_IDs));
        newArrayList.addAll(Arrays.asList(STRING_LITERAL_IDs));
        return newArrayList;
    }

    @Override // codemining.languagetools.ITokenizer
    public ITokenizer.FullToken getTokenFromString(String str) {
        return str.equals(ITokenizer.SENTENCE_START) ? new ITokenizer.FullToken(ITokenizer.SENTENCE_START, ITokenizer.SENTENCE_START) : str.equals(ITokenizer.SENTENCE_END) ? new ITokenizer.FullToken(ITokenizer.SENTENCE_END, ITokenizer.SENTENCE_END) : getTokenListFromCode(str.toCharArray()).get(1);
    }

    @Override // codemining.languagetools.ITokenizer
    public List<ITokenizer.FullToken> getTokenListFromCode(char[] cArr) {
        int nextToken;
        ArrayList newArrayList = Lists.newArrayList();
        newArrayList.add(new ITokenizer.FullToken(ITokenizer.SENTENCE_START, ITokenizer.SENTENCE_START));
        PublicScanner prepareScanner = prepareScanner();
        prepareScanner.setSource(cArr);
        do {
            try {
                nextToken = prepareScanner.getNextToken();
            } catch (StringIndexOutOfBoundsException e) {
                LOGGER.warning(ExceptionUtils.getFullStackTrace(e));
            } catch (InvalidInputException e2) {
                LOGGER.warning(ExceptionUtils.getFullStackTrace(e2));
            }
            if (nextToken == 158) {
                break;
            }
            newArrayList.add(new ITokenizer.FullToken(stripTokenIfNeeded(transformToken(nextToken, prepareScanner.getCurrentTokenString())), Integer.toString(nextToken)));
        } while (!prepareScanner.atEnd());
        newArrayList.add(new ITokenizer.FullToken(ITokenizer.SENTENCE_END, ITokenizer.SENTENCE_END));
        return newArrayList;
    }

    @Override // codemining.languagetools.ITokenizer
    public List<ITokenizer.FullToken> getTokenListFromCode(File file) throws IOException {
        return getTokenListFromCode(FileUtils.readFileToString(file).toCharArray());
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public PublicScanner prepareScanner() {
        PublicScanner publicScanner = new PublicScanner();
        publicScanner.tokenizeComments = this.tokenizeComments;
        return publicScanner;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String stripTokenIfNeeded(String str) {
        return str.replace('\n', ' ').replace('\t', ' ').replace('\r', ' ').replace(IOUtils.LINE_SEPARATOR_UNIX, " ").replace(EclipseCommandProvider.TAB, " ").replace("\r", " ").replace("'\\\\'", "'|'").replace("\\", "|");
    }

    @Override // codemining.languagetools.ITokenizer
    public List<String> tokenListFromCode(char[] cArr) {
        int nextToken;
        PublicScanner prepareScanner = prepareScanner();
        ArrayList newArrayList = Lists.newArrayList();
        newArrayList.add(ITokenizer.SENTENCE_START);
        prepareScanner.setSource(cArr);
        do {
            try {
                nextToken = prepareScanner.getNextToken();
            } catch (StringIndexOutOfBoundsException e) {
                LOGGER.warning(ExceptionUtils.getFullStackTrace(e));
            } catch (InvalidInputException e2) {
                LOGGER.warning(ExceptionUtils.getFullStackTrace(e2));
            }
            if (nextToken == 158) {
                break;
            }
            newArrayList.add(stripTokenIfNeeded(transformToken(nextToken, prepareScanner.getCurrentTokenString())));
        } while (!prepareScanner.atEnd());
        newArrayList.add(ITokenizer.SENTENCE_END);
        return newArrayList;
    }

    @Override // codemining.languagetools.ITokenizer
    public List<String> tokenListFromCode(File file) throws IOException {
        return tokenListFromCode(FileUtils.readFileToString(file).toCharArray());
    }

    @Override // codemining.languagetools.ITokenizer
    public SortedMap<Integer, String> tokenListWithPos(char[] cArr) {
        int nextToken;
        PublicScanner prepareScanner = prepareScanner();
        TreeMap newTreeMap = Maps.newTreeMap();
        newTreeMap.put(-1, ITokenizer.SENTENCE_START);
        newTreeMap.put(Integer.MAX_VALUE, ITokenizer.SENTENCE_END);
        prepareScanner.setSource(cArr);
        while (!prepareScanner.atEnd()) {
            do {
                try {
                    nextToken = prepareScanner.getNextToken();
                } catch (InvalidInputException e) {
                    LOGGER.warning(ExceptionUtils.getFullStackTrace(e));
                }
                if (nextToken == 158) {
                    break;
                }
                newTreeMap.put(Integer.valueOf(prepareScanner.getCurrentTokenStartPosition()), stripTokenIfNeeded(transformToken(nextToken, prepareScanner.getCurrentTokenString())));
            } while (!prepareScanner.atEnd());
        }
        return newTreeMap;
    }

    @Override // codemining.languagetools.ITokenizer
    public SortedMap<Integer, ITokenizer.FullToken> tokenListWithPos(File file) throws IOException {
        return fullTokenListWithPos(FileUtils.readFileToString(file).toCharArray());
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String transformToken(int i, String str) {
        return str;
    }
}
