package codemining.java.codeutils;

import codemining.languagetools.ITokenizer;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.logging.Logger;
import org.apache.commons.collections.ListUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.filefilter.AbstractFileFilter;
import org.apache.commons.io.filefilter.RegexFileFilter;
import org.apache.commons.lang.exception.ExceptionUtils;
import org.eclipse.jdt.core.compiler.InvalidInputException;
import org.eclipse.jdt.internal.core.util.PublicScanner;

/* loaded from: input_file:codemining/java/codeutils/JavaWhitespaceTokenizer.class */
public class JavaWhitespaceTokenizer implements ITokenizer {
    public static final String COMMENT_JAVADOC = "COMMENT_JAVADOC";
    public static final String COMMENT_LINE = "COMMENT_LINE";
    public static final String COMMENT_BLOCK = "COMMENT_BLOCK";
    private static final long serialVersionUID = -3956186603216801513L;
    private static final Logger LOGGER = Logger.getLogger(JavaWhitespaceTokenizer.class.getName());

    /* loaded from: input_file:codemining/java/codeutils/JavaWhitespaceTokenizer$AnnotatedToken.class */
    public static class AnnotatedToken {
        public final String token;
        public final int width;

        public AnnotatedToken(String str, int i) {
            this.token = str;
            this.width = i;
        }
    }

    /* loaded from: input_file:codemining/java/codeutils/JavaWhitespaceTokenizer$TokenizerImplementation.class */
    private static class TokenizerImplementation implements ITokenizer {
        private static final long serialVersionUID = 3466332155585174404L;
        private final RegexFileFilter javaCodeFilter;
        int currentIdentationSpaces;
        int currentIdentationTabs;

        private TokenizerImplementation() {
            this.javaCodeFilter = new RegexFileFilter(".*\\.java$");
            this.currentIdentationSpaces = 0;
            this.currentIdentationTabs = 0;
        }

        @Override // codemining.languagetools.ITokenizer
        public SortedMap<Integer, ITokenizer.FullToken> fullTokenListWithPos(char[] cArr) {
            TreeMap newTreeMap = Maps.newTreeMap();
            for (Map.Entry<Integer, String> entry : tokenListWithPos(cArr).entrySet()) {
                newTreeMap.put(entry.getKey(), new ITokenizer.FullToken(entry.getValue(), ""));
            }
            return newTreeMap;
        }

        private List<String> getConvertedToken(PublicScanner publicScanner, int i) throws InvalidInputException {
            ArrayList newArrayList = Lists.newArrayList();
            if (i == 158) {
                return ListUtils.EMPTY_LIST;
            }
            String currentTokenString = publicScanner.getCurrentTokenString();
            if (i == 1000) {
                newArrayList.addAll(toWhiteSpaceSymbol(currentTokenString));
            } else if (i == 5) {
                newArrayList.add(toIdentifierSymbol(currentTokenString));
            } else if (JavaTokenTypeTokenizer.isLiteralToken(i)) {
                newArrayList.add(toLiteralSymbol(currentTokenString));
            } else if (i == 1002) {
                newArrayList.add(JavaWhitespaceTokenizer.COMMENT_BLOCK);
            } else if (i == 1001) {
                newArrayList.add(JavaWhitespaceTokenizer.COMMENT_LINE);
                int nextToken = publicScanner.getNextToken();
                if (nextToken == 1000) {
                    newArrayList.addAll(toWhiteSpaceSymbol(IOUtils.LINE_SEPARATOR_UNIX + publicScanner.getCurrentTokenString()));
                } else {
                    newArrayList.addAll(toWhiteSpaceSymbol(IOUtils.LINE_SEPARATOR_UNIX));
                    newArrayList.addAll(getConvertedToken(publicScanner, nextToken));
                }
            } else if (i == 1003) {
                newArrayList.add(JavaWhitespaceTokenizer.COMMENT_JAVADOC);
            } else {
                newArrayList.add(currentTokenString);
            }
            return newArrayList;
        }

        @Override // codemining.languagetools.ITokenizer
        public AbstractFileFilter getFileFilter() {
            return this.javaCodeFilter;
        }

        @Override // codemining.languagetools.ITokenizer
        public String getIdentifierType() {
            return "";
        }

        @Override // codemining.languagetools.ITokenizer
        public ITokenizer.FullToken getTokenFromString(String str) {
            return new ITokenizer.FullToken(str, "");
        }

        @Override // codemining.languagetools.ITokenizer
        public List<ITokenizer.FullToken> getTokenListFromCode(char[] cArr) {
            int nextToken;
            ArrayList newArrayList = Lists.newArrayList();
            newArrayList.add(new ITokenizer.FullToken(ITokenizer.SENTENCE_START, ITokenizer.SENTENCE_START));
            PublicScanner prepareScanner = prepareScanner(cArr);
            do {
                try {
                    nextToken = prepareScanner.getNextToken();
                } catch (InvalidInputException e) {
                    JavaWhitespaceTokenizer.LOGGER.warning(ExceptionUtils.getFullStackTrace(e));
                }
                if (nextToken == 158) {
                    break;
                }
                Iterator<String> it = getConvertedToken(prepareScanner, nextToken).iterator();
                while (it.hasNext()) {
                    newArrayList.add(new ITokenizer.FullToken(it.next(), ""));
                }
            } while (!prepareScanner.atEnd());
            newArrayList.add(new ITokenizer.FullToken(ITokenizer.SENTENCE_END, ITokenizer.SENTENCE_END));
            return newArrayList;
        }

        public List<AnnotatedToken> getTokensWithWidthData(char[] cArr) {
            int nextToken;
            ArrayList newArrayList = Lists.newArrayList();
            newArrayList.add(new AnnotatedToken(ITokenizer.SENTENCE_START, 0));
            PublicScanner prepareScanner = prepareScanner(cArr);
            do {
                try {
                    nextToken = prepareScanner.getNextToken();
                } catch (InvalidInputException e) {
                    JavaWhitespaceTokenizer.LOGGER.warning(ExceptionUtils.getFullStackTrace(e));
                }
                if (nextToken == 158) {
                    break;
                }
                for (String str : getConvertedToken(prepareScanner, nextToken)) {
                    int currentTokenStartPosition = prepareScanner.getCurrentTokenStartPosition();
                    newArrayList.add(new AnnotatedToken(str, currentTokenStartPosition - prepareScanner.getLineStart(prepareScanner.getLineNumber(currentTokenStartPosition))));
                }
            } while (!prepareScanner.atEnd());
            newArrayList.add(new AnnotatedToken(ITokenizer.SENTENCE_END, 0));
            return newArrayList;
        }

        private PublicScanner prepareScanner(char[] cArr) {
            PublicScanner publicScanner = new PublicScanner();
            publicScanner.setSource(cArr);
            publicScanner.tokenizeWhiteSpace = true;
            publicScanner.recordLineSeparator = true;
            publicScanner.tokenizeComments = true;
            this.currentIdentationSpaces = 0;
            this.currentIdentationTabs = 0;
            return publicScanner;
        }

        public String toIdentifierSymbol(String str) {
            return JavaTokenTypeTokenizer.IDENTIFIER_TOKEN;
        }

        @Override // codemining.languagetools.ITokenizer
        public List<String> tokenListFromCode(char[] cArr) {
            int nextToken;
            ArrayList newArrayList = Lists.newArrayList();
            newArrayList.add(ITokenizer.SENTENCE_START);
            PublicScanner prepareScanner = prepareScanner(cArr);
            do {
                try {
                    nextToken = prepareScanner.getNextToken();
                } catch (InvalidInputException e) {
                    JavaWhitespaceTokenizer.LOGGER.warning(ExceptionUtils.getFullStackTrace(e));
                }
                if (nextToken == 158) {
                    break;
                }
                newArrayList.addAll(getConvertedToken(prepareScanner, nextToken));
            } while (!prepareScanner.atEnd());
            newArrayList.add(ITokenizer.SENTENCE_END);
            return newArrayList;
        }

        @Override // codemining.languagetools.ITokenizer
        public SortedMap<Integer, String> tokenListWithPos(char[] cArr) {
            int nextToken;
            int currentTokenStartPosition;
            TreeMap newTreeMap = Maps.newTreeMap();
            newTreeMap.put(-1, ITokenizer.SENTENCE_START);
            newTreeMap.put(Integer.MAX_VALUE, ITokenizer.SENTENCE_END);
            PublicScanner prepareScanner = prepareScanner(cArr);
            while (!prepareScanner.atEnd()) {
                do {
                    try {
                        nextToken = prepareScanner.getNextToken();
                        currentTokenStartPosition = prepareScanner.getCurrentTokenStartPosition();
                    } catch (InvalidInputException e) {
                        JavaWhitespaceTokenizer.LOGGER.warning(ExceptionUtils.getFullStackTrace(e));
                    }
                    if (nextToken == 158) {
                        break;
                    }
                    int i = 0;
                    Iterator<String> it = getConvertedToken(prepareScanner, nextToken).iterator();
                    while (it.hasNext()) {
                        newTreeMap.put(Integer.valueOf(currentTokenStartPosition + i), it.next());
                        i++;
                    }
                } while (!prepareScanner.atEnd());
            }
            return newTreeMap;
        }

        public String toLiteralSymbol(String str) {
            return JavaTokenTypeTokenizer.LITERAL_TOKEN;
        }

        public List<String> toWhiteSpaceSymbol(String str) {
            ArrayList newArrayList = Lists.newArrayList();
            int i = 0;
            int i2 = 0;
            int i3 = 0;
            for (char c : str.replace("\r", "").toCharArray()) {
                if (c == '\n') {
                    i3++;
                } else if (c == '\t') {
                    i2++;
                } else if (c == ' ') {
                    i++;
                }
            }
            if (i3 == 0) {
                newArrayList.add("WS_s" + i + "t" + i2);
            } else if (i3 == 1) {
                int i4 = i - this.currentIdentationSpaces;
                int i5 = i2 - this.currentIdentationTabs;
                this.currentIdentationSpaces = i;
                this.currentIdentationTabs = i2;
                if (i4 < 0 || i5 < 0) {
                    newArrayList.add("WS_DEDENTs" + (-i4) + "t" + (-i5));
                } else {
                    newArrayList.add("WS_INDENTs" + i4 + "t" + i5);
                }
            } else {
                String replace = str.replace("\r", "");
                int indexOf = replace.indexOf(10, 0);
                if (indexOf > 0) {
                    newArrayList.addAll(toWhiteSpaceSymbol(replace.substring(0, indexOf)));
                }
                while (replace.indexOf(10, indexOf + 1) != -1) {
                    int indexOf2 = replace.indexOf(10, indexOf + 1);
                    newArrayList.addAll(toWhiteSpaceSymbol(replace.substring(indexOf, indexOf2)));
                    indexOf = indexOf2;
                }
                if (indexOf + 1 < replace.length()) {
                    newArrayList.addAll(toWhiteSpaceSymbol(replace.substring(indexOf, replace.length())));
                }
            }
            return newArrayList;
        }

        /* synthetic */ TokenizerImplementation(TokenizerImplementation tokenizerImplementation) {
            this();
        }
    }

    @Override // codemining.languagetools.ITokenizer
    public SortedMap<Integer, ITokenizer.FullToken> fullTokenListWithPos(char[] cArr) {
        return new TokenizerImplementation(null).fullTokenListWithPos(cArr);
    }

    @Override // codemining.languagetools.ITokenizer
    public AbstractFileFilter getFileFilter() {
        return new TokenizerImplementation(null).getFileFilter();
    }

    @Override // codemining.languagetools.ITokenizer
    public String getIdentifierType() {
        return new TokenizerImplementation(null).getIdentifierType();
    }

    @Override // codemining.languagetools.ITokenizer
    public ITokenizer.FullToken getTokenFromString(String str) {
        return new TokenizerImplementation(null).getTokenFromString(str);
    }

    @Override // codemining.languagetools.ITokenizer
    public List<ITokenizer.FullToken> getTokenListFromCode(char[] cArr) {
        return new TokenizerImplementation(null).getTokenListFromCode(cArr);
    }

    public List<AnnotatedToken> getTokensWithWidthData(char[] cArr) {
        return new TokenizerImplementation(null).getTokensWithWidthData(cArr);
    }

    @Override // codemining.languagetools.ITokenizer
    public List<String> tokenListFromCode(char[] cArr) {
        return new TokenizerImplementation(null).tokenListFromCode(cArr);
    }

    @Override // codemining.languagetools.ITokenizer
    public SortedMap<Integer, String> tokenListWithPos(char[] cArr) {
        return new TokenizerImplementation(null).tokenListWithPos(cArr);
    }
}
