package edu.stanford.nlp.ie;

import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.pipeline.CoreEntityMention;
import edu.stanford.nlp.util.AcronymMatcher;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.StringUtils;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;

/* loaded from: input_file:edu/stanford/nlp/ie/KBPBasicSpanishCorefSystem.class */
public class KBPBasicSpanishCorefSystem {
    public static final String NER_PERSON = "PERSON";
    public static final String NER_ORGANIZATION = "ORGANIZATION";
    public static final Set<String> CORPORATE_SUFFIXES = Collections.unmodifiableSet(new HashSet<String>() { // from class: edu.stanford.nlp.ie.KBPBasicSpanishCorefSystem.1
        {
            add("cic");
            add("cio");
            add("general partnership");
            add("llp");
            add("llp.");
            add("limited liability partnership");
            add("lp");
            add("lp.");
            add("limited partnership");
            add("ltd");
            add("ltd.");
            add("plc");
            add("plc.");
            add("private company limited by guarantee");
            add("unlimited company");
            add("sole proprietorship");
            add("sole trader");
            add("na");
            add("nt&sa");
            add("federal credit union");
            add("federal savings bank");
            add("lllp");
            add("lllp.");
            add("llc");
            add("llc.");
            add("lc");
            add("lc.");
            add("ltd");
            add("ltd.");
            add("co");
            add("co.");
            add("pllc");
            add("pllc.");
            add("corp");
            add("corp.");
            add("inc");
            add("inc.");
            add("pc");
            add("p.c.");
            add("dba");
            add("corporation");
            add("incorporated");
            add("limited");
            add("association");
            add("company");
            add("clib");
            add("syndicate");
            add("institute");
            add("fund");
            add("foundation");
            add("club");
            add("partners");
        }
    });

    public List<CoreEntityMention> wrapEntityMentions(List<CoreMap> list) {
        return (List) list.stream().map(coreMap -> {
            return new CoreEntityMention(null, coreMap);
        }).collect(Collectors.toList());
    }

    protected String stripCorporateTitles(String str) {
        for (String str2 : CORPORATE_SUFFIXES) {
            if (str.toLowerCase().endsWith(str2)) {
                return str.substring(0, str.length() - str2.length()).trim();
            }
        }
        return str;
    }

    public String noSpecialChars(String str) {
        char[] charArray = str.toCharArray();
        int i = 0;
        boolean z = true;
        for (char c : charArray) {
            if (c != '\\' && c != '\"' && c != '-') {
                if (z && !Character.isLowerCase(c)) {
                    z = false;
                }
                i++;
            }
        }
        if (i == charArray.length && z) {
            return str;
        }
        char[] cArr = new char[i];
        int i2 = 0;
        for (char c2 : charArray) {
            if (c2 != '\\' && c2 != '\"' && c2 != '-') {
                cArr[i2] = Character.toLowerCase(c2);
                i2++;
            }
        }
        return new String(cArr);
    }

    public boolean moreCanonicalMention(CoreMap coreMap, CoreMap coreMap2) {
        String str = (String) coreMap.get(CoreAnnotations.TextAnnotation.class);
        String str2 = (String) coreMap2.get(CoreAnnotations.TextAnnotation.class);
        int intValue = ((Integer) coreMap.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class)).intValue();
        int intValue2 = ((Integer) coreMap2.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class)).intValue();
        if (str2.length() > str.length()) {
            return true;
        }
        return str2.length() == str.length() && intValue2 < intValue;
    }

    public boolean firstNameMatch(String str, String str2) {
        return Math.min(str.length(), str2.length()) >= 5 && StringUtils.levenshteinDistance(str, str2) < 3;
    }

    protected boolean sameEntityWithoutLinking(CoreEntityMention coreEntityMention, CoreEntityMention coreEntityMention2) {
        String entityType = coreEntityMention.entityType();
        if (entityType.equals(NER_PERSON) && coreEntityMention.tokens().size() >= 2 && coreEntityMention2.tokens().size() >= 2 && coreEntityMention.tokens().get(coreEntityMention.tokens().size() - 1).word().toLowerCase().equals(coreEntityMention2.tokens().get(coreEntityMention2.tokens().size() - 1).word().toLowerCase())) {
            if (firstNameMatch(coreEntityMention.tokens().get(0).word().toLowerCase(), coreEntityMention2.tokens().get(0).word().toLowerCase())) {
                return true;
            }
            if (coreEntityMention.tokens().size() == 2 && coreEntityMention2.tokens().size() == 2) {
                return false;
            }
        }
        double max = Math.max(approximateEntityMatchScore(coreEntityMention.text(), coreEntityMention2.text()), approximateEntityMatchScore(coreEntityMention2.text(), coreEntityMention.text()));
        if (max == 1.0d) {
            return true;
        }
        if (max < 0.34d) {
            return false;
        }
        if (entityType.equals(NER_PERSON) && max > 0.49d) {
            if (Math.min(coreEntityMention.text().length(), coreEntityMention2.text().length()) > 1) {
                if (coreEntityMention.tokens().size() == 1 && coreEntityMention2.tokens().size() > 1 && coreEntityMention2.tokens().get(coreEntityMention2.tokens().size() - 1).word().equalsIgnoreCase(coreEntityMention.tokens().get(0).word())) {
                    return true;
                }
                if (coreEntityMention2.tokens().size() == 1 && coreEntityMention.tokens().size() > 1 && coreEntityMention.tokens().get(coreEntityMention.tokens().size() - 1).word().equalsIgnoreCase(coreEntityMention2.tokens().get(0).word())) {
                    return true;
                }
                if (coreEntityMention.tokens().size() == 1 && coreEntityMention2.tokens().size() > 1 && coreEntityMention2.tokens().get(0).word().equalsIgnoreCase(coreEntityMention.tokens().get(0).word())) {
                    return true;
                }
                if (coreEntityMention2.tokens().size() == 1 && coreEntityMention.tokens().size() > 1 && coreEntityMention.tokens().get(0).word().equalsIgnoreCase(coreEntityMention2.tokens().get(0).word())) {
                    return true;
                }
            }
            if (max > 0.65d) {
                return true;
            }
        }
        return entityType == NER_ORGANIZATION && max > 0.79d;
    }

    private boolean nearExactEntityMatch(String str, String str2) {
        return str.equalsIgnoreCase(str2) || noSpecialChars(str).equalsIgnoreCase(noSpecialChars(str2));
    }

    public double approximateEntityMatchScore(String str, String str2) {
        if (nearExactEntityMatch(str, str2)) {
            return 1.0d;
        }
        String[] split = stripCorporateTitles(str).split("\\s+");
        String[] split2 = stripCorporateTitles(str2).split("\\s+");
        if (AcronymMatcher.isAcronym(split, split2)) {
            return 1.0d;
        }
        int i = 0;
        boolean[] zArr = new boolean[split.length];
        boolean[] zArr2 = new boolean[split2.length];
        for (int i2 = 0; i2 < split.length; i2++) {
            if (!zArr[i2]) {
                String noSpecialChars = noSpecialChars(split[i2]);
                boolean z = false;
                for (int i3 = 0; i3 < split2.length; i3++) {
                    if (!zArr2[i3]) {
                        String noSpecialChars2 = noSpecialChars(split2[i3]);
                        int min = Math.min(noSpecialChars2.length(), noSpecialChars.length());
                        if (noSpecialChars.equalsIgnoreCase(noSpecialChars2) || ((min > 5 && (noSpecialChars.endsWith(noSpecialChars2) || noSpecialChars.startsWith(noSpecialChars2))) || ((min > 5 && (noSpecialChars2.endsWith(noSpecialChars) || noSpecialChars2.startsWith(noSpecialChars))) || (min > 5 && StringUtils.levenshteinDistance(noSpecialChars2, noSpecialChars) <= 1)))) {
                            z = true;
                            zArr[i2] = true;
                            zArr2[i3] = true;
                        }
                    }
                }
                if (z) {
                    i++;
                }
            }
        }
        return i / Math.max(split.length, split2.length);
    }

    public List<List<CoreMap>> clusterEntityMentions(List<CoreMap> list) {
        List<CoreEntityMention> wrapEntityMentions = wrapEntityMentions(list);
        ArrayList arrayList = new ArrayList();
        for (CoreEntityMention coreEntityMention : wrapEntityMentions) {
            boolean z = false;
            Iterator it2 = arrayList.iterator();
            while (it2.hasNext()) {
                ArrayList arrayList2 = (ArrayList) it2.next();
                Iterator it3 = arrayList2.iterator();
                while (true) {
                    if (!it3.hasNext()) {
                        break;
                    }
                    if (sameEntityWithoutLinking(coreEntityMention, (CoreEntityMention) it3.next())) {
                        arrayList2.add(coreEntityMention);
                        z = true;
                        break;
                    }
                }
                if (z) {
                    break;
                }
            }
            if (!z) {
                ArrayList arrayList3 = new ArrayList();
                arrayList3.add(coreEntityMention);
                arrayList.add(arrayList3);
            }
        }
        ArrayList arrayList4 = new ArrayList();
        Iterator it4 = arrayList.iterator();
        while (it4.hasNext()) {
            arrayList4.add((List) ((ArrayList) it4.next()).stream().map(coreEntityMention2 -> {
                return coreEntityMention2.coreMap();
            }).collect(Collectors.toList()));
        }
        return arrayList4;
    }

    public CoreMap bestEntityMention(List<CoreMap> list) {
        CoreMap coreMap = null;
        for (CoreMap coreMap2 : list) {
            if (coreMap == null) {
                coreMap = coreMap2;
            } else if (((String) coreMap2.get(CoreAnnotations.TextAnnotation.class)).length() > ((String) coreMap.get(CoreAnnotations.TextAnnotation.class)).length()) {
                coreMap = coreMap2;
            } else if (((String) coreMap2.get(CoreAnnotations.TextAnnotation.class)).length() == ((String) coreMap.get(CoreAnnotations.TextAnnotation.class)).length() && ((Integer) coreMap2.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class)).intValue() < ((Integer) coreMap.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class)).intValue()) {
                coreMap = coreMap2;
            }
        }
        return coreMap;
    }

    public Map<CoreMap, CoreMap> createCanonicalMentionMap(List<List<CoreMap>> list) {
        HashMap hashMap = new HashMap();
        for (List<CoreMap> list2 : list) {
            CoreMap bestEntityMention = bestEntityMention(list2);
            Iterator<CoreMap> it2 = list2.iterator();
            while (it2.hasNext()) {
                hashMap.put(it2.next(), bestEntityMention);
            }
        }
        return hashMap;
    }

    public Map<CoreMap, CoreMap> canonicalMentionMapFromEntityMentions(List<CoreMap> list) {
        return createCanonicalMentionMap(clusterEntityMentions(list));
    }
}
