package edu.stanford.nlp.trees.international.spanish;

import edu.stanford.nlp.io.ReaderInputStream;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasLemma;
import edu.stanford.nlp.ling.HasTag;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.pipeline.Annotator;
import edu.stanford.nlp.trees.LabeledScoredTreeFactory;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeFactory;
import edu.stanford.nlp.trees.TreeNormalizer;
import edu.stanford.nlp.trees.TreeReader;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.PropertiesUtils;
import edu.stanford.nlp.util.RuntimeInterruptedException;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.XMLUtils;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.regex.Pattern;
import org.apache.commons.lang3.CharEncoding;
import org.apache.lucene.analysis.wikipedia.WikipediaTokenizer;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

/* loaded from: input_file:edu/stanford/nlp/trees/international/spanish/SpanishXMLTreeReader.class */
public class SpanishXMLTreeReader implements TreeReader {
    private static Redwood.RedwoodChannels log = Redwood.channels(SpanishXMLTreeReader.class);
    private InputStream stream;
    private final TreeNormalizer treeNormalizer;
    private final TreeFactory treeFactory;
    private boolean simplifiedTagset;
    private boolean detailedAnnotations;
    private static final String NODE_SENT = "sentence";
    private static final String ATTR_WORD = "wd";
    private static final String ATTR_LEMMA = "lem";
    private static final String ATTR_FUNC = "func";
    private static final String ATTR_NAMED_ENTITY = "ne";
    private static final String ATTR_POS = "pos";
    private static final String ATTR_POSTYPE = "postype";
    private static final String ATTR_ELLIPTIC = "elliptic";
    private static final String ATTR_PUNCT = "punct";
    private static final String ATTR_GENDER = "gen";
    private static final String ATTR_NUMBER = "num";
    private static final String ATTR_COORDINATING = "coord";
    private static final String ATTR_CLAUSE_TYPE = "clausetype";
    private NodeList sentences;
    private int sentIdx;

    public SpanishXMLTreeReader(String str, Reader reader, boolean z, boolean z2, boolean z3, boolean z4) {
        SpanishTreebankLanguagePack spanishTreebankLanguagePack = new SpanishTreebankLanguagePack();
        this.simplifiedTagset = z;
        this.detailedAnnotations = z4;
        this.stream = new ReaderInputStream(reader, spanishTreebankLanguagePack.getEncoding());
        this.treeFactory = new LabeledScoredTreeFactory();
        this.treeNormalizer = new SpanishTreeNormalizer(z, z2, z3);
        try {
            this.sentences = XMLUtils.getXmlParser().parse(this.stream).getDocumentElement().getElementsByTagName(NODE_SENT);
            this.sentIdx = 0;
        } catch (IOException e) {
            e.printStackTrace();
        } catch (SAXException e2) {
            log.info("Parse exception while reading " + str);
            e2.printStackTrace();
        }
    }

    @Override // edu.stanford.nlp.trees.TreeReader, java.io.Closeable, java.lang.AutoCloseable
    public void close() {
        try {
            if (this.stream != null) {
                this.stream.close();
                this.stream = null;
            }
        } catch (IOException e) {
        }
    }

    @Override // edu.stanford.nlp.trees.TreeReader
    public Tree readTree() {
        Tree tree = null;
        while (tree == null && this.sentences != null && this.sentIdx < this.sentences.getLength()) {
            int i = this.sentIdx;
            this.sentIdx = i + 1;
            tree = getTreeFromXML(this.sentences.item(i));
            if (tree != null) {
                tree = this.treeNormalizer.normalizeWholeTree(tree, this.treeFactory);
                if (tree.label() instanceof CoreLabel) {
                    ((CoreLabel) tree.label()).set(CoreAnnotations.SentenceIDAnnotation.class, Integer.toString(i));
                }
            }
        }
        return tree;
    }

    private static boolean isWordNode(Element element) {
        return element.hasAttribute(ATTR_WORD) && !element.hasChildNodes();
    }

    private static boolean isEllipticNode(Element element) {
        return element.hasAttribute(ATTR_ELLIPTIC);
    }

    private String getPOS(Element element) {
        String attribute = element.getAttribute("pos");
        String attribute2 = element.getAttribute(ATTR_NAMED_ENTITY);
        if (attribute.startsWith("np") && attribute.length() == 7 && attribute.charAt(attribute.length() - 1) == '0') {
            char c = '0';
            if (attribute2.equals("location")) {
                c = 'l';
            } else if (attribute2.equals("person")) {
                c = 'p';
            } else if (attribute2.equals("organization")) {
                c = 'o';
            }
            attribute = attribute.substring(0, 6) + c;
        } else if (attribute.equals("")) {
            String word = getWord(element);
            if (word.equals(".")) {
                return "fp";
            }
            if (attribute2.equals("date")) {
                return "w";
            }
            if (attribute2.equals("number")) {
                return "z0";
            }
            String tagName = element.getTagName();
            if (tagName.equals(WikipediaTokenizer.ITALICS)) {
                return WikipediaTokenizer.ITALICS;
            }
            if (tagName.equals("r")) {
                return "rg";
            }
            if (tagName.equals("z")) {
                return "z0";
            }
            String attribute3 = element.getAttribute(ATTR_POSTYPE);
            if (tagName.equals(WikipediaTokenizer.CATEGORY) && attribute3.equals("subordinating")) {
                return "cs";
            }
            if (tagName.equals("p") && attribute3.equals("relative") && word.equalsIgnoreCase("que")) {
                return "pr0cn000";
            }
            if (tagName.equals("s") && (word.equalsIgnoreCase("de") || word.equalsIgnoreCase("del") || word.equalsIgnoreCase("en"))) {
                return "sps00";
            }
            if (word.equals("REGRESA")) {
                return "vmip3s0";
            }
            if (this.simplifiedTagset) {
                if (word.equals("verme")) {
                    return "vmn0000";
                }
                if (tagName.equals("a")) {
                    return "aq0000";
                }
                if (attribute3.equals("proper")) {
                    return "np00000";
                }
                if (attribute3.equals("common")) {
                    return "nc0s000";
                }
                if (tagName.equals("d") && attribute3.equals("numeral")) {
                    return "dn0000";
                }
                if (tagName.equals("d") && (attribute3.equals("article") || word.equalsIgnoreCase(WikipediaTokenizer.EXTERNAL_LINK) || word.equalsIgnoreCase("la"))) {
                    return "da0000";
                }
                if (tagName.equals("p") && attribute3.equals("relative")) {
                    return "pr000000";
                }
                if (tagName.equals("p") && attribute3.equals("personal")) {
                    return "pp000000";
                }
                if (tagName.equals("p") && attribute3.equals("indefinite")) {
                    return "pi000000";
                }
                if (tagName.equals("s") && word.equalsIgnoreCase("como")) {
                    return "sp000";
                }
                if (tagName.equals("n")) {
                    String attribute4 = element.getAttribute("gen");
                    String attribute5 = element.getAttribute(ATTR_NUMBER);
                    return ('n' + (attribute4 == null ? '0' : attribute4.charAt(0)) + 48 + (attribute5 == null ? '0' : attribute5.charAt(0))) + "000";
                }
            }
            if (element.hasAttribute(ATTR_PUNCT)) {
                return word.equals("\"") ? "fe" : word.equals("'") ? "fz" : word.equals("-") ? "fg" : word.equals("(") ? "fpa" : word.equals(")") ? "fpt" : "fz";
            }
        }
        return attribute;
    }

    private static String getWord(Element element) {
        String attribute = element.getAttribute(ATTR_WORD);
        return attribute.isEmpty() ? SpanishTreeNormalizer.EMPTY_LEAF_VALUE : attribute.trim();
    }

    private Tree getTreeFromXML(Node node) {
        Element element = (Element) node;
        if (isWordNode(element)) {
            return buildWordNode(element);
        }
        if (isEllipticNode(element)) {
            return buildEllipticNode(element);
        }
        List<Tree> arrayList = new ArrayList<>();
        Node firstChild = element.getFirstChild();
        while (true) {
            Node node2 = firstChild;
            if (node2 == null) {
                break;
            }
            if (node2.getNodeType() == 1) {
                Tree treeFromXML = getTreeFromXML(node2);
                if (treeFromXML == null) {
                    System.err.printf("%s: Discarding empty tree (root: %s)%n", getClass().getName(), node2.getNodeName());
                } else {
                    arrayList.add(treeFromXML);
                }
            }
            firstChild = node2.getNextSibling();
        }
        if (arrayList.isEmpty()) {
            return null;
        }
        return buildConstituentNode(element, arrayList);
    }

    private Tree buildWordNode(Node node) {
        Element element = (Element) node;
        String normalizeNonterminal = this.treeNormalizer.normalizeNonterminal(getPOS(element));
        String attribute = element.getAttribute(ATTR_LEMMA);
        String normalizeTerminal = this.treeNormalizer.normalizeTerminal(getWord(element));
        Tree newLeaf = this.treeFactory.newLeaf(normalizeTerminal);
        if (newLeaf.label() instanceof HasWord) {
            ((HasWord) newLeaf.label()).setWord(normalizeTerminal);
        }
        if ((newLeaf.label() instanceof HasLemma) && attribute != null) {
            ((HasLemma) newLeaf.label()).setLemma(attribute);
        }
        ArrayList arrayList = new ArrayList();
        arrayList.add(newLeaf);
        Tree newTreeNode = this.treeFactory.newTreeNode(normalizeNonterminal, arrayList);
        if (newTreeNode.label() instanceof HasTag) {
            ((HasTag) newTreeNode.label()).setTag(normalizeNonterminal);
        }
        return newTreeNode;
    }

    private Tree buildEllipticNode(Node node) {
        String nodeName = ((Element) node).getNodeName();
        ArrayList arrayList = new ArrayList();
        Tree newLeaf = this.treeFactory.newLeaf(SpanishTreeNormalizer.EMPTY_LEAF_VALUE);
        if (newLeaf.label() instanceof HasWord) {
            ((HasWord) newLeaf.label()).setWord(SpanishTreeNormalizer.EMPTY_LEAF_VALUE);
        }
        arrayList.add(newLeaf);
        return this.treeFactory.newTreeNode(nodeName, arrayList);
    }

    private Tree buildConstituentNode(Node node, List<Tree> list) {
        Element element = (Element) node;
        String trim = element.getNodeName().trim();
        if (this.detailedAnnotations) {
            if (element.getAttribute(ATTR_COORDINATING).equals("yes")) {
                trim = trim + "-coord";
            } else if (element.hasAttribute(ATTR_CLAUSE_TYPE)) {
                trim = trim + '-' + element.getAttribute(ATTR_CLAUSE_TYPE);
            }
        }
        return this.treeFactory.newTreeNode(this.treeNormalizer.normalizeNonterminal(trim), list);
    }

    private static boolean shouldPrintTree(Tree tree, Pattern pattern, Pattern pattern2) {
        Iterator<Tree> it2 = tree.iterator();
        while (it2.hasNext()) {
            Tree next = it2.next();
            if (next.isPreTerminal()) {
                String value = ((CoreLabel) next.label()).value();
                String value2 = ((CoreLabel) next.firstChild().label()).value();
                if (pattern == null || pattern.matcher(value).find()) {
                    if (pattern2 == null || pattern2.matcher(value2).find()) {
                        return true;
                    }
                }
            }
        }
        return false;
    }

    private static String toString(Tree tree, boolean z) {
        if (!z) {
            return tree.toString();
        }
        StringBuilder sb = new StringBuilder();
        Iterator it2 = tree.getLeaves().iterator();
        while (it2.hasNext()) {
            sb.append(((Tree) it2.next()).label().value()).append(' ');
        }
        return sb.toString();
    }

    public static void process(File file, TreeReader treeReader, Pattern pattern, Pattern pattern2, boolean z) throws IOException {
        int i = 0;
        int i2 = 0;
        String substring = file.getName().substring(0, file.getName().lastIndexOf(46));
        while (true) {
            Tree readTree = treeReader.readTree();
            if (readTree == null) {
                System.err.printf("%s: %d trees, %d matched and printed%n", file.getName(), Integer.valueOf(i), Integer.valueOf(i2));
                return;
            }
            i++;
            if (shouldPrintTree(readTree, pattern, pattern2)) {
                i2++;
                System.out.printf("%s-%s\t%s%n", substring, (String) ((CoreLabel) readTree.label()).get(CoreAnnotations.SentenceIDAnnotation.class), toString(readTree, z));
            }
        }
    }

    private static String usage() {
        StringBuilder sb = new StringBuilder();
        String property = System.getProperty("line.separator");
        sb.append(String.format("Usage: java %s [OPTIONS] file(s)%n%n", SpanishXMLTreeReader.class.getName()));
        sb.append("Options:").append(property);
        sb.append("   -help: Print this message").append(property);
        sb.append("   -ner: Add NER-specific information to trees").append(property);
        sb.append("   -detailedAnnotations: Retain detailed annotations on tree constituents (useful for making treebank for parser, etc.)").append(property);
        sb.append("   -plain: Output corpus in plaintext rather than as trees").append(property);
        sb.append("   -searchPos posRegex: Only print sentences which contain a token whose part of speech matches the given regular expression").append(property);
        sb.append("   -searchWord wordRegex: Only print sentences which contain a token which matches the given regular expression").append(property);
        return sb.toString();
    }

    private static Map<String, Integer> argOptionDefs() {
        Map<String, Integer> newHashMap = Generics.newHashMap();
        newHashMap.put("help", 0);
        newHashMap.put(Annotator.STANFORD_NER, 0);
        newHashMap.put("detailedAnnotations", 0);
        newHashMap.put("plain", 0);
        newHashMap.put("searchPos", 1);
        newHashMap.put("searchWord", 1);
        return newHashMap;
    }

    public static void main(String[] strArr) {
        Properties argsToProperties = StringUtils.argsToProperties(strArr, argOptionDefs());
        if (strArr.length < 1 || argsToProperties.containsKey("help")) {
            log.info(usage());
            return;
        }
        Pattern compile = argsToProperties.containsKey("searchPos") ? Pattern.compile(argsToProperties.getProperty("searchPos")) : null;
        Pattern compile2 = argsToProperties.containsKey("searchWord") ? Pattern.compile(argsToProperties.getProperty("searchWord")) : null;
        boolean bool = PropertiesUtils.getBool(argsToProperties, "plain", false);
        boolean bool2 = PropertiesUtils.getBool(argsToProperties, Annotator.STANFORD_NER, false);
        boolean bool3 = PropertiesUtils.getBool(argsToProperties, "detailedAnnotations", false);
        String[] split = argsToProperties.getProperty("").split(" ");
        ArrayList<File> arrayList = new ArrayList();
        for (String str : split) {
            arrayList.add(new File(str));
        }
        SpanishXMLTreeReaderFactory spanishXMLTreeReaderFactory = new SpanishXMLTreeReaderFactory(true, true, bool2, bool3);
        ExecutorService newFixedThreadPool = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
        for (File file : arrayList) {
            newFixedThreadPool.execute(() -> {
                try {
                    TreeReader newTreeReader = spanishXMLTreeReaderFactory.newTreeReader(file.getPath(), new BufferedReader(new InputStreamReader(new FileInputStream(file), CharEncoding.ISO_8859_1)));
                    process(file, newTreeReader, compile, compile2, bool);
                    newTreeReader.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            });
        }
        newFixedThreadPool.shutdown();
        try {
            newFixedThreadPool.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS);
        } catch (InterruptedException e) {
            throw new RuntimeInterruptedException(e);
        }
    }
}
