package edu.stanford.nlp.quoteattribution.Sieves.training;

import edu.stanford.nlp.classify.Classifier;
import edu.stanford.nlp.classify.GeneralDataset;
import edu.stanford.nlp.classify.RVFDataset;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.RVFDatum;
import edu.stanford.nlp.paragraphs.ParagraphAnnotator;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.QuoteAttributionAnnotator;
import edu.stanford.nlp.quoteattribution.ChapterAnnotator;
import edu.stanford.nlp.quoteattribution.ExtractQuotesClassifier;
import edu.stanford.nlp.quoteattribution.ExtractQuotesUtil;
import edu.stanford.nlp.quoteattribution.Person;
import edu.stanford.nlp.quoteattribution.QuoteAttributionUtils;
import edu.stanford.nlp.quoteattribution.Sieves.Sieve;
import edu.stanford.nlp.quoteattribution.XMLToAnnotation;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.StringUtils;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;

/* loaded from: input_file:edu/stanford/nlp/quoteattribution/Sieves/training/SupervisedSieveTraining.class */
public class SupervisedSieveTraining {
    private static Sieve sieve;
    public static final Set<String> punctuation = new HashSet(Arrays.asList(",", ".", "\"", "\n"));
    public static final Set<String> punctuationForFeatures = new HashSet(Arrays.asList(",", ".", "!", "?"));

    /* loaded from: input_file:edu/stanford/nlp/quoteattribution/Sieves/training/SupervisedSieveTraining$FeaturesData.class */
    public static class FeaturesData {
        public GeneralDataset<String, String> dataset;
        public Map<Integer, Pair<Integer, Integer>> mapQuoteToDataRange;
        public Map<Integer, Sieve.MentionData> mapDatumToMention;

        public FeaturesData(Map<Integer, Pair<Integer, Integer>> map, Map<Integer, Sieve.MentionData> map2, GeneralDataset<String, String> generalDataset) {
            this.mapQuoteToDataRange = map;
            this.mapDatumToMention = map2;
            this.dataset = generalDataset;
        }
    }

    /* loaded from: input_file:edu/stanford/nlp/quoteattribution/Sieves/training/SupervisedSieveTraining$SieveData.class */
    public static class SieveData {
        Annotation doc;
        Map<String, List<Person>> characterMap;
        Map<Integer, String> pronounCorefMap;
        Set<String> animacyList;

        public SieveData(Annotation annotation, Map<String, List<Person>> map, Map<Integer, String> map2, Set<String> set) {
            this.doc = annotation;
            this.characterMap = map;
            this.pronounCorefMap = map2;
            this.animacyList = set;
        }
    }

    private static int getParagraphBeginToken(CoreMap coreMap, List<CoreMap> list) {
        int intValue = ((Integer) coreMap.get(CoreAnnotations.ParagraphIndexAnnotation.class)).intValue();
        int intValue2 = ((Integer) coreMap.get(CoreAnnotations.TokenBeginAnnotation.class)).intValue();
        for (int intValue3 = ((Integer) coreMap.get(CoreAnnotations.SentenceIndexAnnotation.class)).intValue() - 1; intValue3 >= 0; intValue3--) {
            CoreMap coreMap2 = list.get(intValue3);
            if (((Integer) coreMap2.get(CoreAnnotations.ParagraphIndexAnnotation.class)).intValue() != intValue) {
                break;
            }
            intValue2 = ((Integer) coreMap2.get(CoreAnnotations.TokenBeginAnnotation.class)).intValue();
        }
        return intValue2;
    }

    private static int getParagraphEndToken(CoreMap coreMap, List<CoreMap> list) {
        int intValue = ((Integer) coreMap.get(CoreAnnotations.ParagraphIndexAnnotation.class)).intValue();
        int intValue2 = ((Integer) coreMap.get(CoreAnnotations.TokenEndAnnotation.class)).intValue() - 1;
        for (int intValue3 = ((Integer) coreMap.get(CoreAnnotations.SentenceIndexAnnotation.class)).intValue(); intValue3 < list.size(); intValue3++) {
            CoreMap coreMap2 = list.get(intValue3);
            if (((Integer) coreMap2.get(CoreAnnotations.ParagraphIndexAnnotation.class)).intValue() != intValue) {
                break;
            }
            intValue2 = ((Integer) coreMap2.get(CoreAnnotations.TokenEndAnnotation.class)).intValue() - 1;
        }
        return intValue2;
    }

    private static Map<Integer, List<CoreMap>> getQuotesInParagraph(Annotation annotation) {
        List<CoreMap> list = (List) annotation.get(CoreAnnotations.QuotationsAnnotation.class);
        List list2 = (List) annotation.get(CoreAnnotations.SentencesAnnotation.class);
        HashMap hashMap = new HashMap();
        for (CoreMap coreMap : list) {
            CoreMap coreMap2 = (CoreMap) list2.get(((Integer) coreMap.get(CoreAnnotations.SentenceBeginAnnotation.class)).intValue());
            hashMap.putIfAbsent(coreMap2.get(CoreAnnotations.ParagraphIndexAnnotation.class), new ArrayList());
            ((List) hashMap.get(coreMap2.get(CoreAnnotations.ParagraphIndexAnnotation.class))).add(coreMap);
        }
        return hashMap;
    }

    /* JADX WARN: Multi-variable type inference failed */
    private static List<Pair<Integer, Integer>> getRangeExclusion(Pair<Integer, Integer> pair, List<Pair<Integer, Integer>> list) {
        ArrayList arrayList = new ArrayList();
        Pair<Integer, Integer> pair2 = pair;
        for (Pair<Integer, Integer> pair3 : list) {
            Pair pair4 = new Pair(pair2.first, Integer.valueOf(pair3.first.intValue() - 1));
            if (((Integer) pair4.second).intValue() - ((Integer) pair4.first).intValue() >= 0) {
                arrayList.add(pair4);
            }
            if (pair2.second == pair3.second) {
                break;
            }
            pair2 = new Pair<>(Integer.valueOf(pair3.second.intValue() + 1), pair2.second);
        }
        if (pair2.first.intValue() < pair2.second.intValue()) {
            arrayList.add(pair2);
        }
        return arrayList;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v447, types: [java.util.List] */
    /* JADX WARN: Type inference failed for: r0v463, types: [java.util.List] */
    public static FeaturesData featurize(SieveData sieveData, List<XMLToAnnotation.GoldQuoteInfo> list, boolean z) {
        String str;
        Annotation annotation = sieveData.doc;
        sieve = new Sieve(annotation, sieveData.characterMap, sieveData.pronounCorefMap, sieveData.animacyList);
        List list2 = (List) annotation.get(CoreAnnotations.QuotationsAnnotation.class);
        List list3 = (List) annotation.get(CoreAnnotations.SentencesAnnotation.class);
        List list4 = (List) annotation.get(CoreAnnotations.TokensAnnotation.class);
        Map<Integer, List<CoreMap>> quotesInParagraph = getQuotesInParagraph(annotation);
        RVFDataset rVFDataset = new RVFDataset();
        HashMap hashMap = new HashMap();
        HashMap hashMap2 = new HashMap();
        if (z && list.size() != list2.size()) {
            throw new RuntimeException("Gold Quote List size doesn't match quote list size!");
        }
        for (int i = 0; i < list2.size(); i++) {
            int size = rVFDataset.size();
            CoreMap coreMap = (CoreMap) list2.get(i);
            XMLToAnnotation.GoldQuoteInfo goldQuoteInfo = null;
            if (z) {
                goldQuoteInfo = list.get(i);
                if (goldQuoteInfo.speaker == "") {
                    continue;
                }
            }
            CoreMap coreMap2 = (CoreMap) list3.get(((Integer) coreMap.get(CoreAnnotations.SentenceBeginAnnotation.class)).intValue());
            Pair<Integer, Integer> pair = new Pair<>(coreMap.get(CoreAnnotations.TokenBeginAnnotation.class), coreMap.get(CoreAnnotations.TokenEndAnnotation.class));
            int intValue = ((Integer) coreMap2.get(CoreAnnotations.ParagraphIndexAnnotation.class)).intValue();
            int intValue2 = pair.first.intValue() - 1;
            int intValue3 = pair.first.intValue() - 1;
            for (int intValue4 = ((Integer) coreMap.get(CoreAnnotations.SentenceBeginAnnotation.class)).intValue(); intValue4 >= 0; intValue4--) {
                CoreMap coreMap3 = (CoreMap) list3.get(intValue4);
                if (((Integer) coreMap3.get(CoreAnnotations.ParagraphIndexAnnotation.class)).intValue() != intValue) {
                    if (((Integer) coreMap3.get(CoreAnnotations.ParagraphIndexAnnotation.class)).intValue() != intValue - 1) {
                        break;
                    }
                    intValue3 = ((Integer) coreMap3.get(CoreAnnotations.TokenBeginAnnotation.class)).intValue();
                }
            }
            ArrayList arrayList = new ArrayList();
            if (intValue3 > -1 && intValue2 > -1) {
                arrayList = eliminateDuplicates(sieve.findClosestMentionsInSpanBackward(new Pair<>(Integer.valueOf(intValue3), Integer.valueOf(intValue2))));
            }
            int intValue5 = pair.second.intValue() + 1;
            int intValue6 = pair.second.intValue() + 1;
            for (int intValue7 = ((Integer) coreMap.get(CoreAnnotations.SentenceEndAnnotation.class)).intValue(); intValue7 < list3.size(); intValue7++) {
                CoreMap coreMap4 = (CoreMap) list3.get(intValue7);
                if (((Integer) coreMap4.get(CoreAnnotations.ParagraphIndexAnnotation.class)).intValue() != intValue) {
                    break;
                }
                intValue6 = ((Integer) coreMap4.get(CoreAnnotations.TokenEndAnnotation.class)).intValue() - 1;
            }
            ArrayList arrayList2 = new ArrayList();
            if (intValue5 < list4.size() && intValue6 < list4.size()) {
                arrayList2 = sieve.findClosestMentionsInSpanForward(new Pair<>(Integer.valueOf(intValue5), Integer.valueOf(intValue6)));
            }
            ArrayList<Sieve.MentionData> arrayList3 = new ArrayList();
            arrayList3.addAll(arrayList);
            arrayList3.addAll(arrayList2);
            int i2 = 1;
            int size2 = arrayList.size();
            for (Sieve.MentionData mentionData : arrayList3) {
                ClassicCounter classicCounter = new ClassicCounter();
                boolean z2 = true;
                int intValue8 = pair.first.intValue() - mentionData.end;
                if (intValue8 < 0) {
                    z2 = false;
                    intValue8 = mentionData.begin - pair.second.intValue();
                }
                if (intValue8 >= 0) {
                    classicCounter.setCount("wordDistance", intValue8);
                    for (CoreLabel coreLabel : z2 ? list4.subList(mentionData.end + 1, pair.first.intValue()) : list4.subList(pair.second.intValue() + 1, mentionData.begin)) {
                        if (punctuation.contains(coreLabel.word())) {
                            classicCounter.setCount("punctuationPresence:" + coreLabel.word(), 1.0d);
                        }
                    }
                    classicCounter.setCount("rankedDistance", i2);
                    i2++;
                    if (i2 == size2) {
                        i2 = 1;
                    }
                    int i3 = -1;
                    CoreMap coreMap5 = null;
                    int paragraphBeginToken = getParagraphBeginToken(coreMap2, list3);
                    int paragraphEndToken = getParagraphEndToken(coreMap2, list3);
                    if (z2) {
                        if (paragraphBeginToken > mentionData.begin || mentionData.end > paragraphEndToken) {
                            int i4 = 1;
                            int i5 = intValue - 1;
                            CoreMap coreMap6 = coreMap2;
                            int intValue9 = ((Integer) coreMap6.get(CoreAnnotations.SentenceIndexAnnotation.class)).intValue();
                            while (true) {
                                if (i5 < 0) {
                                    break;
                                }
                                while (((Integer) coreMap6.get(CoreAnnotations.ParagraphIndexAnnotation.class)).intValue() != i5) {
                                    intValue9--;
                                    coreMap6 = (CoreMap) list3.get(intValue9);
                                }
                                int paragraphBeginToken2 = getParagraphBeginToken(coreMap6, list3);
                                int paragraphEndToken2 = getParagraphEndToken(coreMap6, list3);
                                if (paragraphBeginToken2 > mentionData.begin || mentionData.end > paragraphEndToken2) {
                                    i4++;
                                    i5--;
                                } else {
                                    i3 = i5;
                                    coreMap5 = coreMap6;
                                    classicCounter.setCount("leftParagraphDistance", i4);
                                    if (i4 % 2 == 0) {
                                        classicCounter.setCount("leftParagraphDistanceEven", 1.0d);
                                    }
                                }
                            }
                        } else {
                            classicCounter.setCount("leftParagraphDistance", 0.0d);
                            i3 = intValue;
                            coreMap5 = coreMap2;
                        }
                    } else if (paragraphBeginToken > mentionData.begin || mentionData.end > paragraphEndToken) {
                        int i6 = 1;
                        int i7 = intValue + 1;
                        CoreMap coreMap7 = coreMap2;
                        int intValue10 = ((Integer) coreMap7.get(CoreAnnotations.SentenceIndexAnnotation.class)).intValue();
                        while (true) {
                            if (intValue10 >= list3.size()) {
                                break;
                            }
                            while (((Integer) coreMap7.get(CoreAnnotations.ParagraphIndexAnnotation.class)).intValue() != i7) {
                                intValue10++;
                                coreMap7 = (CoreMap) list3.get(intValue10);
                            }
                            int paragraphBeginToken3 = getParagraphBeginToken(coreMap7, list3);
                            int paragraphEndToken3 = getParagraphEndToken(coreMap7, list3);
                            if (paragraphBeginToken3 <= mentionData.begin && mentionData.end <= paragraphEndToken3) {
                                coreMap5 = coreMap7;
                                classicCounter.setCount("rightParagraphDistance", i6);
                                break;
                            }
                            i6++;
                            i7++;
                        }
                    } else {
                        classicCounter.setCount("rightParagraphDistance", 0.0d);
                        coreMap5 = coreMap2;
                        i3 = intValue;
                    }
                    if (coreMap5 != null) {
                        int paragraphBeginToken4 = getParagraphBeginToken(coreMap5, list3);
                        int paragraphEndToken4 = getParagraphEndToken(coreMap5, list3);
                        if (paragraphBeginToken4 != paragraphBeginToken || paragraphEndToken4 != paragraphEndToken) {
                            List<CoreMap> orDefault = quotesInParagraph.getOrDefault(Integer.valueOf(i3), new ArrayList());
                            Pair<ArrayList<String>, ArrayList<Pair<Integer, Integer>>> scanForNames = sieve.scanForNames(new Pair<>(Integer.valueOf(paragraphBeginToken4), Integer.valueOf(paragraphEndToken4)));
                            classicCounter.setCount("quotesInMentionParagraph", orDefault.size());
                            classicCounter.setCount("wordsInMentionParagraph", (paragraphEndToken4 - paragraphBeginToken4) + 1);
                            classicCounter.setCount("namesInMentionParagraph", scanForNames.first.size());
                            for (int i8 = 0; i8 < scanForNames.second.size(); i8++) {
                                if (ExtractQuotesUtil.rangeContains(new Pair(Integer.valueOf(mentionData.begin), Integer.valueOf(mentionData.end)), scanForNames.second.get(i8))) {
                                    classicCounter.setCount("orderInParagraph", i8);
                                }
                            }
                            if (orDefault.size() == 1) {
                                CoreMap coreMap8 = orDefault.get(0);
                                if (((Integer) coreMap8.get(CoreAnnotations.TokenBeginAnnotation.class)).intValue() == paragraphBeginToken4 && ((Integer) coreMap8.get(CoreAnnotations.TokenEndAnnotation.class)).intValue() - 1 == paragraphEndToken4) {
                                    classicCounter.setCount("mentionParagraphIsInConversation", 1.0d);
                                } else {
                                    classicCounter.setCount("mentionParagraphIsInConversation", -1.0d);
                                }
                            }
                            for (CoreMap coreMap9 : orDefault) {
                                if (ExtractQuotesUtil.rangeContains(new Pair(coreMap9.get(CoreAnnotations.TokenBeginAnnotation.class), Integer.valueOf(((Integer) coreMap9.get(CoreAnnotations.TokenEndAnnotation.class)).intValue() - 1)), new Pair(Integer.valueOf(mentionData.begin), Integer.valueOf(mentionData.end)))) {
                                    classicCounter.setCount("mentionInQuote", 1.0d);
                                }
                            }
                            if (classicCounter.getCount("mentionInQuote") != 1.0d) {
                                classicCounter.setCount("mentionNotInQuote", 1.0d);
                            }
                        }
                    }
                    if (mentionData.begin > 0) {
                        CoreLabel coreLabel2 = (CoreLabel) list4.get(mentionData.begin - 1);
                        classicCounter.setCount("prevWordType:" + coreLabel2.tag(), 1.0d);
                        if (punctuationForFeatures.contains(coreLabel2.lemma())) {
                            classicCounter.setCount("prevWordPunct:" + coreLabel2.lemma(), 1.0d);
                        }
                    }
                    if (mentionData.end + 1 < list4.size()) {
                        CoreLabel coreLabel3 = (CoreLabel) list4.get(mentionData.end + 1);
                        classicCounter.setCount("nextWordType:" + coreLabel3.tag(), 1.0d);
                        if (punctuationForFeatures.contains(coreLabel3.lemma())) {
                            classicCounter.setCount("nextWordPunct:" + coreLabel3.lemma(), 1.0d);
                        }
                    }
                    List<CoreMap> list5 = quotesInParagraph.get(Integer.valueOf(intValue));
                    classicCounter.setCount("QuotesInQuoteParagraph", list5.size());
                    classicCounter.setCount("WordsInQuoteParagraph", (paragraphEndToken - paragraphBeginToken) + 1);
                    classicCounter.setCount("NamesInQuoteParagraph", sieve.scanForNames(new Pair<>(Integer.valueOf(paragraphBeginToken), Integer.valueOf(paragraphEndToken))).first.size());
                    classicCounter.setCount("quoteLength", (((Integer) coreMap.get(CoreAnnotations.TokenEndAnnotation.class)).intValue() - ((Integer) coreMap.get(CoreAnnotations.TokenBeginAnnotation.class)).intValue()) + 1);
                    for (int i9 = 0; i9 < list5.size(); i9++) {
                        if (list5.get(i9).equals(coreMap)) {
                            classicCounter.setCount("quotePosition", i9 + 1);
                        }
                    }
                    if (classicCounter.getCount("quotePosition") == 0.0d) {
                        throw new RuntimeException("Check this (equality not working)");
                    }
                    Pair<ArrayList<String>, ArrayList<Pair<Integer, Integer>>> scanForNames2 = sieve.scanForNames(pair);
                    Iterator<String> it2 = scanForNames2.first.iterator();
                    while (it2.hasNext()) {
                        classicCounter.setCount("charactersInQuote:" + sieveData.characterMap.get(it2.next()).get(0).name, 1.0d);
                    }
                    if (((Integer) coreMap.get(CoreAnnotations.TokenBeginAnnotation.class)).intValue() == paragraphBeginToken && ((Integer) coreMap.get(CoreAnnotations.TokenEndAnnotation.class)).intValue() == paragraphEndToken) {
                        classicCounter.setCount("isImplicitSpeaker", 1.0d);
                    } else {
                        classicCounter.setCount("isImplicitSpeaker", -1.0d);
                    }
                    if (mentionData.type.equals("name")) {
                        List<Person> list6 = sieveData.characterMap.get(sieve.tokenRangeToString(new Pair<>(Integer.valueOf(mentionData.begin), Integer.valueOf(mentionData.end))));
                        Person person = null;
                        if (list6 != null) {
                            person = list6.get(0);
                        } else {
                            Pair<ArrayList<String>, ArrayList<Pair<Integer, Integer>>> scanForNames3 = sieve.scanForNames(new Pair<>(Integer.valueOf(mentionData.begin), Integer.valueOf(mentionData.end)));
                            if (scanForNames3.first.size() != 0 && (str = scanForNames3.first.get(0)) != null && sieveData.characterMap.containsKey(str)) {
                                person = sieveData.characterMap.get(str).get(0);
                            }
                        }
                        if (person != null) {
                            Iterator<String> it3 = scanForNames2.first.iterator();
                            while (it3.hasNext()) {
                                if (person.aliases.contains(it3.next())) {
                                    classicCounter.setCount("nameInQuote", 1.0d);
                                }
                            }
                            if (intValue > 0) {
                                List<CoreMap> orDefault2 = quotesInParagraph.getOrDefault(Integer.valueOf(intValue - 1), new ArrayList());
                                ArrayList arrayList4 = new ArrayList();
                                for (CoreMap coreMap10 : orDefault2) {
                                    Pair<Integer, Integer> pair2 = new Pair<>(coreMap10.get(CoreAnnotations.TokenBeginAnnotation.class), coreMap10.get(CoreAnnotations.TokenEndAnnotation.class));
                                    arrayList4.add(pair2);
                                    Iterator<String> it4 = sieve.scanForNames(pair2).first.iterator();
                                    while (it4.hasNext()) {
                                        if (person.aliases.contains(it4.next())) {
                                            classicCounter.setCount("nameInPrevParagraphQuote", 1.0d);
                                        }
                                    }
                                }
                                CoreMap coreMap11 = null;
                                int intValue11 = ((Integer) coreMap2.get(CoreAnnotations.SentenceIndexAnnotation.class)).intValue() - 1;
                                while (true) {
                                    if (intValue11 < 0) {
                                        break;
                                    }
                                    CoreMap coreMap12 = (CoreMap) list3.get(intValue11);
                                    if (((Integer) coreMap12.get(CoreAnnotations.ParagraphIndexAnnotation.class)).intValue() == intValue - 1) {
                                        coreMap11 = coreMap12;
                                        break;
                                    }
                                    intValue11--;
                                }
                                Iterator<Pair<Integer, Integer>> it5 = getRangeExclusion(new Pair(Integer.valueOf(getParagraphBeginToken(coreMap11, list3)), Integer.valueOf(getParagraphEndToken(coreMap11, list3))), arrayList4).iterator();
                                while (it5.hasNext()) {
                                    Iterator<String> it6 = sieve.scanForNames(it5.next()).first.iterator();
                                    while (it6.hasNext()) {
                                        if (person.aliases.contains(it6.next())) {
                                            classicCounter.setCount("nameInPrevParagraphNonQuote", 1.0d);
                                        }
                                    }
                                }
                            }
                        }
                    }
                    if (!z) {
                        RVFDatum rVFDatum = new RVFDatum(classicCounter, "none");
                        rVFDatum.setID(Integer.toString(rVFDataset.size()));
                        hashMap2.put(Integer.valueOf(rVFDataset.size()), mentionData);
                        rVFDataset.add(rVFDatum);
                    } else if (QuoteAttributionUtils.rangeContains(new Pair(Integer.valueOf(goldQuoteInfo.mentionStartTokenIndex), Integer.valueOf(goldQuoteInfo.mentionEndTokenIndex)), new Pair(Integer.valueOf(mentionData.begin), Integer.valueOf(mentionData.end)))) {
                        RVFDatum rVFDatum2 = new RVFDatum(classicCounter, "isMention");
                        rVFDatum2.setID(Integer.toString(rVFDataset.size()));
                        hashMap2.put(Integer.valueOf(rVFDataset.size()), mentionData);
                        rVFDataset.add(rVFDatum2);
                    } else {
                        RVFDatum rVFDatum3 = new RVFDatum(classicCounter, "isNotMention");
                        rVFDatum3.setID(Integer.toString(rVFDataset.size()));
                        rVFDataset.add(rVFDatum3);
                        hashMap2.put(Integer.valueOf(rVFDataset.size()), mentionData);
                    }
                }
            }
            hashMap.put(Integer.valueOf(i), new Pair(Integer.valueOf(size), Integer.valueOf(rVFDataset.size() - 1)));
        }
        return new FeaturesData(hashMap, hashMap2, rVFDataset);
    }

    private static List<Sieve.MentionData> eliminateDuplicates(List<Sieve.MentionData> list) {
        ArrayList arrayList = new ArrayList();
        HashSet hashSet = new HashSet();
        for (int i = 0; i < list.size(); i++) {
            Sieve.MentionData mentionData = list.get(i);
            String str = mentionData.text;
            if (!hashSet.contains(str) || mentionData.type.equals("Pronoun")) {
                arrayList.add(mentionData);
            }
            hashSet.add(str);
        }
        return arrayList;
    }

    public static void outputModel(String str, Classifier<String, String> classifier) {
        try {
            ObjectOutputStream objectOutputStream = new ObjectOutputStream(new FileOutputStream(str));
            objectOutputStream.writeObject(classifier);
            objectOutputStream.flush();
            objectOutputStream.close();
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e2) {
            e2.printStackTrace();
        }
    }

    public static void train(XMLToAnnotation.Data data, Properties properties) {
        Map<String, List<Person>> readPersonMap = QuoteAttributionUtils.readPersonMap(properties.getProperty("charactersPath"));
        outputModel(properties.getProperty("modelPath"), new ExtractQuotesClassifier(featurize(new SieveData(data.doc, readPersonMap, QuoteAttributionUtils.setupCoref(properties.getProperty("booknlpCoref"), readPersonMap, data.doc), QuoteAttributionUtils.readAnimacyList(QuoteAttributionAnnotator.ANIMACY_WORD_LIST)), data.goldList, true).dataset).getClassifier());
    }

    public static void main(String[] strArr) throws Exception {
        String str = strArr.length >= 1 ? strArr[0] : "/home/mjfang/action_grammars/";
        String str2 = strArr.length >= 2 ? strArr[1] : "1PPDevUncollapsed.props";
        System.out.println("Base directory: " + str);
        Properties propFileToProperties = StringUtils.propFileToProperties(str + "ExtractQuotesXMLScripts/" + str2);
        XMLToAnnotation.Data readXMLFormat = XMLToAnnotation.readXMLFormat(propFileToProperties.getProperty("file"));
        Properties properties = new Properties();
        properties.setProperty("paragraphBreak", "one");
        new ParagraphAnnotator(properties, false).annotate(readXMLFormat.doc);
        Properties properties2 = new Properties();
        properties2.setProperty("charactersPath", propFileToProperties.getProperty("charactersPath"));
        properties2.setProperty("booknlpCoref", propFileToProperties.getProperty("booknlpCoref"));
        properties2.setProperty("modelPath", propFileToProperties.getProperty("modelPath"));
        new QuoteAttributionAnnotator(properties2).annotate(readXMLFormat.doc);
        new ChapterAnnotator().annotate(readXMLFormat.doc);
        train(readXMLFormat, properties2);
    }
}
