package codemining.cpp.codeutils;

import codemining.languagetools.ITokenizer;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.SortedMap;
import java.util.TreeMap;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.filefilter.AbstractFileFilter;
import org.apache.commons.io.filefilter.RegexFileFilter;
import org.apache.commons.lang.NotImplementedException;
import org.eclipse.cdt.internal.formatter.scanner.Scanner;

/* loaded from: input_file:codemining/cpp/codeutils/CDTTokenizer.class */
public class CDTTokenizer implements ITokenizer {
    private static final long serialVersionUID = 3954406410244227404L;
    public static final RegexFileFilter C_CODE_TOKENIZER = new RegexFileFilter(".*\\.(c|cc|cpp|h)$");

    @Override // codemining.languagetools.ITokenizer
    public SortedMap<Integer, ITokenizer.FullToken> fullTokenListWithPos(char[] cArr) {
        TreeMap newTreeMap = Maps.newTreeMap();
        newTreeMap.put(-1, new ITokenizer.FullToken(ITokenizer.SENTENCE_START, ITokenizer.SENTENCE_START));
        newTreeMap.put(Integer.MAX_VALUE, new ITokenizer.FullToken(ITokenizer.SENTENCE_END, ITokenizer.SENTENCE_END));
        Scanner scanner = new Scanner();
        scanner.setSource(cArr);
        do {
            int nextToken = scanner.getNextToken();
            if (nextToken != 1000) {
                newTreeMap.put(Integer.valueOf(scanner.getCurrentPosition()), new ITokenizer.FullToken(new String(scanner.getCurrentTokenSource()), Integer.toString(nextToken)));
            }
        } while (!scanner.atEnd());
        return newTreeMap;
    }

    @Override // codemining.languagetools.ITokenizer
    public AbstractFileFilter getFileFilter() {
        return C_CODE_TOKENIZER;
    }

    @Override // codemining.languagetools.ITokenizer
    public String getIdentifierType() {
        return Integer.toString(1);
    }

    @Override // codemining.languagetools.ITokenizer
    public Collection<String> getKeywordTypes() {
        throw new NotImplementedException();
    }

    @Override // codemining.languagetools.ITokenizer
    public Collection<String> getLiteralTypes() {
        throw new NotImplementedException();
    }

    @Override // codemining.languagetools.ITokenizer
    public ITokenizer.FullToken getTokenFromString(String str) {
        return str.equals(ITokenizer.SENTENCE_START) ? new ITokenizer.FullToken(ITokenizer.SENTENCE_START, ITokenizer.SENTENCE_START) : str.equals(ITokenizer.SENTENCE_END) ? new ITokenizer.FullToken(ITokenizer.SENTENCE_END, ITokenizer.SENTENCE_END) : getTokenListFromCode(str.toCharArray()).get(1);
    }

    @Override // codemining.languagetools.ITokenizer
    public List<ITokenizer.FullToken> getTokenListFromCode(char[] cArr) {
        ArrayList newArrayList = Lists.newArrayList();
        newArrayList.add(new ITokenizer.FullToken(ITokenizer.SENTENCE_START, ITokenizer.SENTENCE_START));
        Scanner scanner = new Scanner();
        scanner.setSource(cArr);
        do {
            int nextToken = scanner.getNextToken();
            if (nextToken != 1000) {
                newArrayList.add(new ITokenizer.FullToken(new String(scanner.getCurrentTokenSource()), Integer.toString(nextToken)));
            }
        } while (!scanner.atEnd());
        newArrayList.add(new ITokenizer.FullToken(ITokenizer.SENTENCE_END, ITokenizer.SENTENCE_END));
        return newArrayList;
    }

    @Override // codemining.languagetools.ITokenizer
    public List<ITokenizer.FullToken> getTokenListFromCode(File file) throws IOException {
        return getTokenListFromCode(FileUtils.readFileToString(file).toCharArray());
    }

    @Override // codemining.languagetools.ITokenizer
    public List<String> tokenListFromCode(char[] cArr) {
        ArrayList newArrayList = Lists.newArrayList();
        newArrayList.add(ITokenizer.SENTENCE_START);
        Scanner scanner = new Scanner();
        scanner.setSource(cArr);
        do {
            if (scanner.getNextToken() != 1000) {
                newArrayList.add(new String(scanner.getCurrentTokenSource()));
            }
        } while (!scanner.atEnd());
        newArrayList.add(ITokenizer.SENTENCE_END);
        return newArrayList;
    }

    @Override // codemining.languagetools.ITokenizer
    public List<String> tokenListFromCode(File file) throws IOException {
        return tokenListFromCode(FileUtils.readFileToString(file).toCharArray());
    }

    @Override // codemining.languagetools.ITokenizer
    public SortedMap<Integer, String> tokenListWithPos(char[] cArr) {
        TreeMap newTreeMap = Maps.newTreeMap();
        newTreeMap.put(-1, ITokenizer.SENTENCE_START);
        newTreeMap.put(Integer.MAX_VALUE, ITokenizer.SENTENCE_END);
        Scanner scanner = new Scanner();
        scanner.setSource(cArr);
        do {
            if (scanner.getNextToken() != 1000) {
                newTreeMap.put(Integer.valueOf(scanner.getCurrentPosition()), new String(scanner.getCurrentTokenSource()));
            }
        } while (!scanner.atEnd());
        return newTreeMap;
    }

    @Override // codemining.languagetools.ITokenizer
    public SortedMap<Integer, ITokenizer.FullToken> tokenListWithPos(File file) throws IOException {
        return fullTokenListWithPos(FileUtils.readFileToString(file).toCharArray());
    }
}
