package codemining.java.tokenizers;

import codemining.languagetools.ITokenizer;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.logging.Logger;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.filefilter.AbstractFileFilter;
import org.apache.commons.io.filefilter.RegexFileFilter;
import org.apache.commons.lang.exception.ExceptionUtils;
import org.eclipse.jdt.core.compiler.InvalidInputException;
import org.eclipse.jdt.internal.core.util.PublicScanner;

/* loaded from: input_file:codemining/java/tokenizers/JavaTokenTypeTokenizer.class */
public class JavaTokenTypeTokenizer implements ITokenizer {
    private static final long serialVersionUID = 7532823864395627836L;
    private static final Logger LOGGER = Logger.getLogger(JavaTokenTypeTokenizer.class.getName());
    private final RegexFileFilter javaCodeFiler;
    private final boolean tokenizeComments;
    public static final String LITERAL_TOKEN = "LITERAL";
    public static final String IDENTIFIER_TOKEN = "IDENTIFIER";
    public static final String COMMENT_JAVADOC = "COMMENT_JAVADOC";
    public static final String COMMENT_LINE = "COMMENT_LINE";
    public static final String COMMENT_BLOCK = "COMMENT_BLOCK";

    public static boolean isLiteralToken(int i) {
        return i == 45 || i == 44 || i == 42 || i == 40 || i == 41 || i == 43;
    }

    public JavaTokenTypeTokenizer() {
        this.javaCodeFiler = new RegexFileFilter(".*\\.java$");
        this.tokenizeComments = false;
    }

    public JavaTokenTypeTokenizer(boolean z) {
        this.javaCodeFiler = new RegexFileFilter(".*\\.java$");
        this.tokenizeComments = z;
    }

    private PublicScanner createScanner() {
        PublicScanner publicScanner = new PublicScanner();
        publicScanner.tokenizeComments = this.tokenizeComments;
        return publicScanner;
    }

    @Override // codemining.languagetools.ITokenizer
    public SortedMap<Integer, ITokenizer.FullToken> fullTokenListWithPos(char[] cArr) {
        int nextToken;
        int currentTokenStartPosition;
        TreeMap newTreeMap = Maps.newTreeMap();
        newTreeMap.put(-1, new ITokenizer.FullToken(ITokenizer.SENTENCE_START, ITokenizer.SENTENCE_START));
        newTreeMap.put(Integer.MAX_VALUE, new ITokenizer.FullToken(ITokenizer.SENTENCE_END, ITokenizer.SENTENCE_END));
        PublicScanner createScanner = createScanner();
        createScanner.setSource(cArr);
        while (!createScanner.atEnd()) {
            do {
                try {
                    nextToken = createScanner.getNextToken();
                    currentTokenStartPosition = createScanner.getCurrentTokenStartPosition();
                } catch (InvalidInputException e) {
                    LOGGER.warning(ExceptionUtils.getFullStackTrace(e));
                }
                if (nextToken != 158) {
                    if (nextToken == 5) {
                        newTreeMap.put(Integer.valueOf(currentTokenStartPosition), new ITokenizer.FullToken("IDENTIFIER", ""));
                    } else if (nextToken == 1002) {
                        newTreeMap.put(Integer.valueOf(currentTokenStartPosition), new ITokenizer.FullToken(COMMENT_BLOCK, ""));
                    } else if (nextToken == 1003) {
                        newTreeMap.put(Integer.valueOf(currentTokenStartPosition), new ITokenizer.FullToken(COMMENT_JAVADOC, ""));
                    } else if (nextToken == 1001) {
                        newTreeMap.put(Integer.valueOf(currentTokenStartPosition), new ITokenizer.FullToken(COMMENT_LINE, ""));
                    } else if (isLiteralToken(nextToken)) {
                        newTreeMap.put(Integer.valueOf(currentTokenStartPosition), new ITokenizer.FullToken("LITERAL", ""));
                    } else {
                        newTreeMap.put(Integer.valueOf(currentTokenStartPosition), new ITokenizer.FullToken(createScanner.getCurrentTokenString(), ""));
                    }
                }
            } while (!createScanner.atEnd());
        }
        return newTreeMap;
    }

    @Override // codemining.languagetools.ITokenizer
    public AbstractFileFilter getFileFilter() {
        return this.javaCodeFiler;
    }

    @Override // codemining.languagetools.ITokenizer
    public String getIdentifierType() {
        throw new IllegalArgumentException("Retrieving types is not possible");
    }

    @Override // codemining.languagetools.ITokenizer
    public Collection<String> getKeywordTypes() {
        throw new IllegalArgumentException("Retrieving types is not possible");
    }

    @Override // codemining.languagetools.ITokenizer
    public Collection<String> getLiteralTypes() {
        throw new IllegalArgumentException("Retrieving types is not possible");
    }

    @Override // codemining.languagetools.ITokenizer
    public ITokenizer.FullToken getTokenFromString(String str) {
        return new ITokenizer.FullToken(str, "");
    }

    @Override // codemining.languagetools.ITokenizer
    public List<ITokenizer.FullToken> getTokenListFromCode(char[] cArr) {
        int nextToken;
        ArrayList newArrayList = Lists.newArrayList();
        newArrayList.add(new ITokenizer.FullToken(ITokenizer.SENTENCE_START, ITokenizer.SENTENCE_START));
        PublicScanner createScanner = createScanner();
        createScanner.setSource(cArr);
        do {
            try {
                nextToken = createScanner.getNextToken();
            } catch (InvalidInputException e) {
                LOGGER.warning(ExceptionUtils.getFullStackTrace(e));
            }
            if (nextToken == 158) {
                break;
            }
            if (nextToken == 5) {
                newArrayList.add(new ITokenizer.FullToken("IDENTIFIER", ""));
            } else if (isLiteralToken(nextToken)) {
                newArrayList.add(new ITokenizer.FullToken("LITERAL", ""));
            } else if (nextToken == 1002) {
                newArrayList.add(new ITokenizer.FullToken(COMMENT_BLOCK, ""));
            } else if (nextToken == 1003) {
                newArrayList.add(new ITokenizer.FullToken(COMMENT_JAVADOC, ""));
            } else if (nextToken == 1001) {
                newArrayList.add(new ITokenizer.FullToken(COMMENT_LINE, ""));
            } else {
                newArrayList.add(new ITokenizer.FullToken(createScanner.getCurrentTokenString(), ""));
            }
        } while (!createScanner.atEnd());
        newArrayList.add(new ITokenizer.FullToken(ITokenizer.SENTENCE_END, ITokenizer.SENTENCE_END));
        return newArrayList;
    }

    @Override // codemining.languagetools.ITokenizer
    public List<ITokenizer.FullToken> getTokenListFromCode(File file) throws IOException {
        return getTokenListFromCode(FileUtils.readFileToString(file).toCharArray());
    }

    @Override // codemining.languagetools.ITokenizer
    public List<String> tokenListFromCode(char[] cArr) {
        int nextToken;
        ArrayList newArrayList = Lists.newArrayList();
        newArrayList.add(ITokenizer.SENTENCE_START);
        PublicScanner createScanner = createScanner();
        createScanner.setSource(cArr);
        do {
            try {
                nextToken = createScanner.getNextToken();
            } catch (InvalidInputException e) {
                LOGGER.warning(ExceptionUtils.getFullStackTrace(e));
            }
            if (nextToken == 158) {
                break;
            }
            if (nextToken == 5) {
                newArrayList.add("IDENTIFIER");
            } else if (nextToken == 1002) {
                newArrayList.add(COMMENT_BLOCK);
            } else if (nextToken == 1001) {
                newArrayList.add(COMMENT_LINE);
            } else if (nextToken == 1003) {
                newArrayList.add(COMMENT_JAVADOC);
            } else if (isLiteralToken(nextToken)) {
                newArrayList.add("LITERAL");
            } else {
                newArrayList.add(createScanner.getCurrentTokenString());
            }
        } while (!createScanner.atEnd());
        newArrayList.add(ITokenizer.SENTENCE_END);
        return newArrayList;
    }

    @Override // codemining.languagetools.ITokenizer
    public List<String> tokenListFromCode(File file) throws IOException {
        return tokenListFromCode(FileUtils.readFileToString(file).toCharArray());
    }

    @Override // codemining.languagetools.ITokenizer
    public SortedMap<Integer, String> tokenListWithPos(char[] cArr) {
        int nextToken;
        int currentTokenStartPosition;
        TreeMap newTreeMap = Maps.newTreeMap();
        newTreeMap.put(-1, ITokenizer.SENTENCE_START);
        newTreeMap.put(Integer.MAX_VALUE, ITokenizer.SENTENCE_END);
        PublicScanner createScanner = createScanner();
        createScanner.setSource(cArr);
        while (!createScanner.atEnd()) {
            do {
                try {
                    nextToken = createScanner.getNextToken();
                    currentTokenStartPosition = createScanner.getCurrentTokenStartPosition();
                } catch (InvalidInputException e) {
                    LOGGER.warning(ExceptionUtils.getFullStackTrace(e));
                }
                if (nextToken != 158) {
                    if (nextToken == 5) {
                        newTreeMap.put(Integer.valueOf(currentTokenStartPosition), "IDENTIFIER");
                    } else if (isLiteralToken(nextToken)) {
                        newTreeMap.put(Integer.valueOf(currentTokenStartPosition), "LITERAL");
                    } else if (nextToken == 1002) {
                        newTreeMap.put(Integer.valueOf(currentTokenStartPosition), COMMENT_BLOCK);
                    } else if (nextToken == 1003) {
                        newTreeMap.put(Integer.valueOf(currentTokenStartPosition), COMMENT_JAVADOC);
                    } else if (nextToken == 1001) {
                        newTreeMap.put(Integer.valueOf(currentTokenStartPosition), COMMENT_LINE);
                    } else {
                        newTreeMap.put(Integer.valueOf(currentTokenStartPosition), createScanner.getCurrentTokenString());
                    }
                }
            } while (!createScanner.atEnd());
        }
        return newTreeMap;
    }

    @Override // codemining.languagetools.ITokenizer
    public SortedMap<Integer, ITokenizer.FullToken> tokenListWithPos(File file) throws IOException {
        return fullTokenListWithPos(FileUtils.readFileToString(file).toCharArray());
    }
}
