package cc.mallet.share.upenn.ner;

import cc.mallet.pipe.Pipe;
import cc.mallet.pipe.SerialPipes;
import cc.mallet.pipe.tsf.RegexMatches;
import cc.mallet.pipe.tsf.TokenText;
import java.io.File;
import java.util.regex.Pattern;
import org.apache.commons.configuration.tree.DefaultExpressionEngine;
import org.eclipse.jdt.core.search.IJavaSearchScope;

/* loaded from: input_file:cc/mallet/share/upenn/ner/NEPipes.class */
public class NEPipes extends SerialPipes {
    static String ALLCAPS = "([A-Z]*)";
    static String ALLLOWER = "([a-z]*)";
    static String INITCAPS = "([A-Z].*)";
    static String MIXEDCASE = "(.*[a-z].*[A-Z].*)";
    static String MIXEDNUM = "(.*[0-9].*)";
    static String ENDSENTENCE = "([.!?])";
    static String PUNCTUATION = "([:;,.!?-])";
    static String BRACKET = "([(){}\\[\\]])";
    static String ORDINAL = "([0-9]+(st|rd|th))";
    static String QUOTED = "([\"'].*[\"'])";
    static String BRACKETED = "([({\\[].*[)}\\]])";
    static String INITIAL = "([A-Z][.])";
    static String DOTS = "([.][.])";
    static String DASHES = "(--)";
    static String FRACTION = "(<DIGITS>/<DIGITS>)";
    static String DOTDECIMAL = "((<DIGITS>)?[.]<DIGITS>)";
    static String DECIMAL = "(<DIGITS>|" + DOTDECIMAL + DefaultExpressionEngine.DEFAULT_INDEX_END;
    static String NUMBER_WORD = "(zero|one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|thirty|forty|fifty|sixty|seventy|eighty|ninety|hundred|thousand|million|billion|trillion)";
    static String CURRENCY = "(dollar(s)?|cent(s)?|pound(s)?|euro(s)?|franc(s)?|yen)";
    static String MONEYWORDS = DefaultExpressionEngine.DEFAULT_INDEX_START + NUMBER_WORD + "+" + CURRENCY + DefaultExpressionEngine.DEFAULT_INDEX_END;
    static String COMMA_DECIMAL = "((<DIGITS>,)+<DIGITS>([.]<DIGITS>)?)";
    static String ILLION = "(m(illion)?|b(illion)?|MM|k)";
    static String MIXED_ILLION = "([0-9]+" + ILLION + DefaultExpressionEngine.DEFAULT_INDEX_END;
    static String RANGE = DefaultExpressionEngine.DEFAULT_INDEX_START + DECIMAL + "-" + DECIMAL + DefaultExpressionEngine.DEFAULT_INDEX_END;
    static String TIMENUM = "(<DIGITS>:<DIGITS>(:<DIGITS>)?)";
    static String AMPM = "(am|a[.]m[.]|pm|p[.]m[.])";
    static String MIXED_AMPM = "([0-9]+" + AMPM + DefaultExpressionEngine.DEFAULT_INDEX_END;
    static String TIMEZONES = "(PST|PDT|MST|MDT|CST|CDT|EST|EDT|UTC|GMT)";
    static String SPECIALTIME = "(noon|midnight)";
    static String TIME = "((" + TIMENUM + AMPM + "?|(<DIGITS>)" + AMPM + IJavaSearchScope.JAR_FILE_ENTRY_SEPARATOR + "(<DIGITS>:)?" + MIXED_AMPM + DefaultExpressionEngine.DEFAULT_INDEX_END + TIMEZONES + "?|" + SPECIALTIME + DefaultExpressionEngine.DEFAULT_INDEX_END;
    static String TIMERANGE = DefaultExpressionEngine.DEFAULT_INDEX_START + TIME + "(-|to|until)" + TIME + DefaultExpressionEngine.DEFAULT_INDEX_END;
    static String P10 = "(([(]?<DIGITS>[)]?[-]?)<DIGITS>[-]?<DIGITS>|<DIGITS>[.]<DIGITS>[.]<DIGITS>)";
    static String P5 = "(<DIGITS>[-]<DIGITS>)";
    static String DAY = "(<DIGITS>|[1-3]?[0-9](st|rd|th))";
    static String YEAR = "(<YEAR>)";
    static String DECADE = "(<YEARDECADE>)";
    static String MONTHNAME = "(January|February|March|April|May|June|July|August|September|October|November|December)";
    static String MONTHABBR = "((Jan|Feb|Mar|Apr|Jun|Jul|Aug|Sep|Sept|Oct|Nov|Dec)[.]?)";
    static String MONTH = DefaultExpressionEngine.DEFAULT_INDEX_START + MONTHNAME + IJavaSearchScope.JAR_FILE_ENTRY_SEPARATOR + MONTHABBR + DefaultExpressionEngine.DEFAULT_INDEX_END;
    static String WEEKDAYNAME = "(Sunday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday)";
    static String WEEKDAYABBR = "((Sun|Mon|Tue|Tues|Wed|Thu|Thur|Thurs|Fri|Sat)[.]?)";
    static String WEEKDAY = DefaultExpressionEngine.DEFAULT_INDEX_START + WEEKDAYNAME + IJavaSearchScope.JAR_FILE_ENTRY_SEPARATOR + WEEKDAYABBR + DefaultExpressionEngine.DEFAULT_INDEX_END;
    static String MONTHDAY = DefaultExpressionEngine.DEFAULT_INDEX_START + MONTH + DAY + DefaultExpressionEngine.DEFAULT_INDEX_END;
    static String DAYMONTHDAY = DefaultExpressionEngine.DEFAULT_INDEX_START + WEEKDAY + "[,]?" + MONTHDAY + DefaultExpressionEngine.DEFAULT_INDEX_END;
    static String MONTHYEAR = DefaultExpressionEngine.DEFAULT_INDEX_START + MONTH + "[,]?" + YEAR + DefaultExpressionEngine.DEFAULT_INDEX_END;
    static String MONTHDAYYEAR = DefaultExpressionEngine.DEFAULT_INDEX_START + MONTHDAY + "[,]?" + YEAR + DefaultExpressionEngine.DEFAULT_INDEX_END;
    static String DAYMONTHDAYYEAR = DefaultExpressionEngine.DEFAULT_INDEX_START + DAYMONTHDAY + "[,]?" + YEAR + DefaultExpressionEngine.DEFAULT_INDEX_END;
    static String SEP = "([-/])";
    static String SEPDATE = "(<DIGITS>" + SEP + "<DIGITS>(" + SEP + "(<DIGITS>|" + YEAR + "))?)";
    static String FULLSEPDATE = "(<DIGITS>" + SEP + "<DIGITS>" + SEP + "(<DIGITS>|" + YEAR + "))";

    public NEPipes(File file) {
        super(new Pipe[]{new TokenText("text="), new RegexMatches("SingleLetter", Pattern.compile("[A-Za-z]")), new RegexMatches("AllCaps", Pattern.compile(ALLCAPS)), new RegexMatches("AllLower", Pattern.compile(ALLLOWER)), new RegexMatches("InitCaps", Pattern.compile(INITCAPS)), new RegexMatches("MixedCase", Pattern.compile(MIXEDCASE)), new RegexMatches("MixedNum", Pattern.compile(MIXEDNUM)), new RegexMatches("EndSentPunc", Pattern.compile(ENDSENTENCE)), new RegexMatches("Punc", Pattern.compile(PUNCTUATION)), new RegexMatches("Bracket", Pattern.compile(BRACKET)), new RegexMatches("Ordinal", Pattern.compile(ORDINAL, 2)), new LongRegexMatches("Quoted", Pattern.compile(QUOTED), 3, 4), new LongRegexMatches("Bracketed", Pattern.compile(BRACKETED), 3, 4), new LongRegexMatches("Initial", Pattern.compile(INITIAL), 2, 2), new LongRegexMatches("Ellipse", Pattern.compile(DOTS), 2, 2), new LongRegexMatches("Dashes", Pattern.compile(DASHES), 2, 2), new LongRegexMatches("Fraction", Pattern.compile(FRACTION), 3, 3), new LongRegexMatches("DotDecimal", Pattern.compile(DOTDECIMAL), 2, 3), new LongRegexMatches("Percent", Pattern.compile(DefaultExpressionEngine.DEFAULT_INDEX_START + RANGE + IJavaSearchScope.JAR_FILE_ENTRY_SEPARATOR + DECIMAL + ")%"), 2, 4), new RegexMatches("10^3n", Pattern.compile(ILLION, 2)), new LongRegexMatches("Numeric", Pattern.compile(DECIMAL), 1, 3), new LongRegexMatches("BigNumber", Pattern.compile(COMMA_DECIMAL), 3, 7), new LongRegexMatches("kmbNumber", Pattern.compile(DECIMAL + ILLION, 2), 1, 4), new RegexMatches("kmbMixed", Pattern.compile(MIXED_ILLION, 2)), new LongRegexMatches("Dollars", Pattern.compile("[$](" + RANGE + IJavaSearchScope.JAR_FILE_ENTRY_SEPARATOR + DECIMAL + IJavaSearchScope.JAR_FILE_ENTRY_SEPARATOR + COMMA_DECIMAL + IJavaSearchScope.JAR_FILE_ENTRY_SEPARATOR + DECIMAL + ILLION + IJavaSearchScope.JAR_FILE_ENTRY_SEPARATOR + MIXED_ILLION + DefaultExpressionEngine.DEFAULT_INDEX_END, 2), 2, 8), new RegexMatches("NumberWord", Pattern.compile(NUMBER_WORD, 2)), new RegexMatches("Currency", Pattern.compile(CURRENCY, 2)), new LongRegexMatches("MoneyWords", Pattern.compile(MONEYWORDS, 2), 2, 4), new LongRegexMatches("AmPm", Pattern.compile(AMPM, 2), 1, 4), new RegexMatches("MixedAmPm", Pattern.compile(MIXED_AMPM, 2)), new LongRegexMatches("TimeNum", Pattern.compile(TIMENUM), 3, 5), new RegexMatches("TimeZone", Pattern.compile(TIMEZONES, 2)), new LongRegexMatches("Time", Pattern.compile(TIME, 2), 1, 9), new LongRegexMatches("TimeRange", Pattern.compile(TIMERANGE, 2), 3, 19), new LongRegexMatches("P10", Pattern.compile(P10), 3, 7), new LongRegexMatches("P5", Pattern.compile(P10), 3, 3), new LongRegexMatches("Phone", Pattern.compile(P10 + IJavaSearchScope.JAR_FILE_ENTRY_SEPARATOR + P5), 3, 7), new RegexMatches("UncasedMonthName", Pattern.compile(MONTHNAME, 2)), new LongRegexMatches("UncasedMonthAbbr", Pattern.compile(MONTHABBR, 2), 1, 2), new LongRegexMatches("CasedMonth", Pattern.compile(MONTH), 1, 2), new LongRegexMatches("UncasedMonth", Pattern.compile(MONTH, 2), 1, 2), new RegexMatches("UncasedWeekdayName", Pattern.compile(WEEKDAYNAME, 2)), new LongRegexMatches("UncasedWeekdayAbbr", Pattern.compile(WEEKDAYABBR, 2), 1, 2), new LongRegexMatches("CasedWeekday", Pattern.compile(WEEKDAY), 1, 2), new LongRegexMatches("UncasedWeekday", Pattern.compile(WEEKDAY, 2), 1, 2), new LongRegexMatches("MonthDay", Pattern.compile(MONTHDAY, 2), 2, 3), new LongRegexMatches("DayMonthDay", Pattern.compile(DAYMONTHDAY, 2), 3, 6), new LongRegexMatches("MonthYear", Pattern.compile(MONTHYEAR, 2), 2, 4), new LongRegexMatches("MonthDayYear", Pattern.compile(MONTHDAYYEAR, 2), 3, 5), new LongRegexMatches("DayMonthDayYear", Pattern.compile(DAYMONTHDAYYEAR, 2), 4, 8), new LongRegexMatches("SeparatorDate", Pattern.compile(SEPDATE), 3, 5), new LongRegexMatches("FullSeparatorDate", Pattern.compile(FULLSEPDATE), 5, 5), new ListMember("Country", new File(file, "countries.txt"), false), new ListMember("Africa", new File(file, "africa.txt"), true), new ListMember("Asia", new File(file, "asia.txt"), true), new ListMember("Europe", new File(file, "europe.txt"), true), new ListMember("NorAm", new File(file, "north_america.txt"), true), new ListMember("SouAm", new File(file, "south_america.txt"), true), new ListMember("Island", new File(file, "islands.txt"), true), new ListMember("Region", new File(file, "regions.txt"), true), new ListMember("USState", new File(file, "states.txt"), true), new ListMember("CanadaProv", new File(file, "provinces.txt"), true), new ListMember("City", new File(file, "cities.txt"), true), new ListMember("USCity", new File(file, "us_cities.txt"), true), new ListMember("Terrain", new File(file, "terrain.txt"), true), new ListMember("Geographical", new File(file, "geo.txt"), true), new LengthBins("Length", new int[]{1, 2, 3, 5, 10}), new FeatureWindow(1, 1)});
    }
}
