package edu.stanford.nlp.international.arabic;

import edu.stanford.nlp.process.SerializableFunction;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.trees.international.arabic.ATBTreeUtils;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.logging.Redwood;
import java.util.Map;
import java.util.StringTokenizer;
import java.util.regex.Pattern;

/* loaded from: input_file:edu/stanford/nlp/international/arabic/Buckwalter.class */
public class Buckwalter implements SerializableFunction<String, String> {
    private static final long serialVersionUID = 4351710914246859336L;
    boolean outputUnicodeValues;
    private boolean unicode2Buckwalter;
    private final Map<Character, Character> u2bMap;
    private final Map<Character, Character> b2uMap;
    private ClassicCounter<String> unmappable;
    private static final boolean PASS_ASCII_IN_UNICODE = true;
    private static Redwood.RedwoodChannels log = Redwood.channels(Buckwalter.class);
    private static final char[] arabicChars = {1569, 1570, 1571, 1572, 1573, 1574, 1575, 1576, 1577, 1578, 1579, 1580, 1581, 1582, 1583, 1584, 1585, 1586, 1587, 1588, 1589, 1590, 1591, 1592, 1593, 1594, 1600, 1601, 1602, 1603, 1604, 1605, 1606, 1607, 1608, 1609, 1610, 1611, 1612, 1613, 1614, 1615, 1616, 1617, 1618, 1648, 1649, 1662, 1670, 1688, 1700, 1711, 1573, 1571, 1572, 1548, 1563, 1567, 1642, 1643, 1776, 1777, 1778, 1779, 1780, 1781, 1782, 1783, 1784, 1785, 1632, 1633, 1634, 1635, 1636, 1637, 1638, 1639, 1640, 1641, 171, 187};
    private static final char[] buckChars = {'\'', '|', '>', '&', '<', '}', 'A', 'b', 'p', 't', 'v', 'j', 'H', 'x', 'd', '*', 'r', 'z', 's', '$', 'S', 'D', 'T', 'Z', 'E', 'g', '_', 'f', 'q', 'k', 'l', 'm', 'n', 'h', 'w', 'Y', 'y', 'F', 'N', 'K', 'a', 'u', 'i', '~', 'o', '`', '{', 'P', 'J', 'R', 'V', 'G', 'I', 'O', 'W', ',', ';', '?', '%', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '\"', '\"'};
    private static boolean DEBUG = false;
    private static boolean SUPPRESS_DIGIT_MAPPING_IN_B2A = true;
    private static boolean SUPPRESS_PUNC_MAPPING_IN_B2A = true;
    private static final Pattern latinPunc = Pattern.compile("[\"\\?%,-;\\._]+");
    private static final StringBuilder usage = new StringBuilder();

    public Buckwalter() {
        this.outputUnicodeValues = false;
        this.unicode2Buckwalter = false;
        if (arabicChars.length != buckChars.length) {
            throw new RuntimeException(getClass().getName() + ": Inconsistent u2b/b2u arrays.");
        }
        this.u2bMap = Generics.newHashMap(arabicChars.length);
        this.b2uMap = Generics.newHashMap(buckChars.length);
        for (int i = 0; i < arabicChars.length; i++) {
            Character valueOf = Character.valueOf(arabicChars[i]);
            Character valueOf2 = Character.valueOf(buckChars[i]);
            this.u2bMap.put(valueOf, valueOf2);
            this.b2uMap.put(valueOf2, valueOf);
        }
        if (DEBUG) {
            this.unmappable = new ClassicCounter<>();
        }
    }

    public Buckwalter(boolean z) {
        this();
        this.unicode2Buckwalter = z;
    }

    public void suppressBuckDigitConversion(boolean z) {
        SUPPRESS_DIGIT_MAPPING_IN_B2A = z;
    }

    public void suppressBuckPunctConversion(boolean z) {
        SUPPRESS_PUNC_MAPPING_IN_B2A = z;
    }

    @Override // java.util.function.Function
    public String apply(String str) {
        return convert(str, this.unicode2Buckwalter);
    }

    public String buckwalterToUnicode(String str) {
        return convert(str, false);
    }

    public String unicodeToBuckwalter(String str) {
        return convert(str, true);
    }

    private String convert(String str, boolean z) {
        Character ch2;
        StringTokenizer stringTokenizer = new StringTokenizer(str);
        StringBuilder sb = new StringBuilder(str.length());
        while (stringTokenizer.hasMoreTokens()) {
            String nextToken = stringTokenizer.nextToken();
            int i = 0;
            while (true) {
                if (i >= nextToken.length()) {
                    break;
                }
                if (ATBTreeUtils.reservedWords.contains(nextToken)) {
                    sb.append(nextToken);
                    break;
                }
                Character valueOf = Character.valueOf(nextToken.charAt(i));
                if (z) {
                    ch2 = valueOf.charValue() < 127 ? valueOf : this.u2bMap.get(valueOf);
                } else {
                    ch2 = ((SUPPRESS_DIGIT_MAPPING_IN_B2A && Character.isDigit(valueOf.charValue())) || (SUPPRESS_PUNC_MAPPING_IN_B2A && latinPunc.matcher(valueOf.toString()).matches())) ? valueOf : this.b2uMap.get(valueOf);
                }
                if (ch2 == null) {
                    if (DEBUG) {
                        this.unmappable.incrementCount(valueOf + "[U+" + StringUtils.padLeft(Integer.toString(valueOf.charValue(), 16).toUpperCase(), 4, '0') + ']');
                    }
                    sb.append(valueOf);
                } else if (this.outputUnicodeValues) {
                    sb.append("\\u").append(StringUtils.padLeft(Integer.toString(valueOf.charValue(), 16).toUpperCase(), 4, '0'));
                } else {
                    sb.append(ch2);
                }
                i++;
            }
            sb.append(" ");
        }
        return sb.toString().trim();
    }

    /* JADX WARN: Removed duplicated region for block: B:19:0x00a0  */
    /* JADX WARN: Removed duplicated region for block: B:22:0x00a5  */
    /* JADX WARN: Removed duplicated region for block: B:24:0x00aa  */
    /* JADX WARN: Removed duplicated region for block: B:26:0x00b1 A[SYNTHETIC] */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    public static void main(java.lang.String[] r8) {
        /*
            Method dump skipped, instructions count: 579
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: edu.stanford.nlp.international.arabic.Buckwalter.main(java.lang.String[]):void");
    }

    static {
        usage.append("Usage: java Buckwalter [OPTS] file   (or < file)\n");
        usage.append("Options:\n");
        usage.append("          -u2b : Unicode -> Buckwalter (default is Buckwalter -> Unicode).\n");
        usage.append("          -d   : Debug mode.\n");
        usage.append("          -o   : Output unicode values.\n");
    }
}
