package edu.stanford.nlp.coref.statistical;

import edu.stanford.nlp.coref.CorefDocumentProcessor;
import edu.stanford.nlp.coref.CorefUtils;
import edu.stanford.nlp.coref.data.Document;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.util.Pair;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.stream.Collectors;

/* loaded from: input_file:edu/stanford/nlp/coref/statistical/DatasetBuilder.class */
public class DatasetBuilder implements CorefDocumentProcessor {
    private final int maxExamplesPerDocument;
    private final double minClassImbalancedPerDocument;
    private final Map<Integer, Map<Pair<Integer, Integer>, Boolean>> mentionPairs;
    private final Random random;

    public DatasetBuilder() {
        this(0.0d, Integer.MAX_VALUE);
    }

    public DatasetBuilder(double d, int i) {
        this.maxExamplesPerDocument = i;
        this.minClassImbalancedPerDocument = d;
        this.mentionPairs = new HashMap();
        this.random = new Random(0L);
    }

    @Override // edu.stanford.nlp.coref.CorefDocumentProcessor
    public void process(int i, Document document) {
        Map<Pair<Integer, Integer>, Boolean> labeledMentionPairs = CorefUtils.getLabeledMentionPairs(document);
        long count = labeledMentionPairs.keySet().stream().filter(pair -> {
            return ((Boolean) labeledMentionPairs.get(pair)).booleanValue();
        }).count();
        List list = (List) labeledMentionPairs.keySet().stream().filter(pair2 -> {
            return !((Boolean) labeledMentionPairs.get(pair2)).booleanValue();
        }).collect(Collectors.toList());
        if (((float) count) / ((float) (count + list.size())) < this.minClassImbalancedPerDocument) {
            int i2 = (int) ((count / this.minClassImbalancedPerDocument) - count);
            Collections.shuffle(list);
            for (int i3 = i2; i3 < list.size(); i3++) {
                labeledMentionPairs.remove(list.get(i3));
            }
        }
        HashMap hashMap = new HashMap();
        for (Pair<Integer, Integer> pair3 : labeledMentionPairs.keySet()) {
            List list2 = (List) hashMap.get(pair3.second);
            if (list2 == null) {
                list2 = new ArrayList();
                hashMap.put(pair3.second, list2);
            }
            list2.add(pair3.first);
        }
        ArrayList arrayList = new ArrayList(hashMap.keySet());
        while (labeledMentionPairs.size() > this.maxExamplesPerDocument) {
            int intValue = ((Integer) arrayList.remove(this.random.nextInt(arrayList.size()))).intValue();
            Iterator it2 = ((List) hashMap.get(Integer.valueOf(intValue))).iterator();
            while (it2.hasNext()) {
                labeledMentionPairs.remove(new Pair(Integer.valueOf(((Integer) it2.next()).intValue()), Integer.valueOf(intValue)));
            }
        }
        this.mentionPairs.put(Integer.valueOf(i), labeledMentionPairs);
    }

    @Override // edu.stanford.nlp.coref.CorefDocumentProcessor
    public void finish() throws Exception {
        IOUtils.writeObjectToFile(this.mentionPairs, StatisticalCorefTrainer.datasetFile);
    }
}
