package de.metanome.algorithms.cody.codycore;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ListMultimap;
import com.google.common.collect.MultimapBuilder;
import com.univocity.parsers.csv.CsvFormat;
import com.univocity.parsers.csv.CsvParser;
import com.univocity.parsers.csv.CsvParserSettings;
import it.unimi.dsi.fastutil.ints.IntArrayList;
import it.unimi.dsi.fastutil.ints.IntList;
import it.unimi.dsi.fastutil.ints.IntListIterator;
import it.unimi.dsi.fastutil.objects.Object2IntLinkedOpenHashMap;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.stream.IntStream;
import lombok.NonNull;
import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
import org.roaringbitmap.buffer.MutableRoaringBitmap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/metanome/algorithms/cody/codycore/Preprocessor.class */
public class Preprocessor {
    private static final Logger log = LoggerFactory.getLogger((Class<?>) Preprocessor.class);
    private final Configuration configuration;
    private final Object2IntLinkedOpenHashMap<IntList> rowDeduplicator;
    private List<MutableRoaringBitmap> columnPlisMutable;
    private int nRowsDistinct;
    private List<ImmutableRoaringBitmap> columnPlis;
    private List<List<Integer>> columnIndexToDuplicatesMapping;
    private String[] columnIndexToNameMapping;
    private int nRows;
    private int[] rowCounts;

    public Preprocessor(@NonNull Configuration configuration) {
        if (configuration == null) {
            throw new NullPointerException("configuration is marked non-null but is null");
        }
        this.configuration = configuration;
        this.rowDeduplicator = new Object2IntLinkedOpenHashMap<>();
        this.nRows = 0;
        this.nRowsDistinct = 0;
    }

    public void run() {
        CsvParser initializeParser = initializeParser();
        String[] parseNext = initializeParser.parseNext();
        ArrayList arrayList = new ArrayList(parseNext.length);
        for (int i = 0; i < parseNext.length; i++) {
            arrayList.add(new MutableRoaringBitmap());
            this.columnPlisMutable = ImmutableList.copyOf((Collection) arrayList);
        }
        if (this.configuration.isNoHeader()) {
            addRow(parseNext);
        } else {
            this.columnIndexToNameMapping = parseNext;
        }
        while (true) {
            String[] parseNext2 = initializeParser.parseNext();
            if (parseNext2 == null) {
                transformRows();
                transformColumns();
                log.info("Deduplicated {} rows to {}, {} columns to {}", Integer.valueOf(this.nRows), Integer.valueOf(this.nRowsDistinct), Integer.valueOf(this.columnPlisMutable.size()), Integer.valueOf(this.columnPlis.size()));
                return;
            }
            addRow(parseNext2);
            this.nRows++;
        }
    }

    private void addRow(String[] strArr) {
        IntArrayList intArrayList = new IntArrayList();
        for (int i = 0; i < strArr.length; i++) {
            if (strArr[i].equals(this.configuration.getNullValue())) {
                intArrayList.add(i);
            }
        }
        if (this.rowDeduplicator.containsKey(intArrayList)) {
            this.rowDeduplicator.addTo(intArrayList, 1);
            return;
        }
        this.rowDeduplicator.put((Object2IntLinkedOpenHashMap<IntList>) intArrayList, 1);
        IntListIterator it2 = intArrayList.iterator();
        while (it2.hasNext()) {
            this.columnPlisMutable.get(it2.next().intValue()).add(this.nRowsDistinct);
        }
        this.nRowsDistinct++;
    }

    /* JADX WARN: Multi-variable type inference failed */
    private CsvParser initializeParser() {
        BufferedReader bufferedReader = null;
        try {
            bufferedReader = new BufferedReader(new FileReader(new File(this.configuration.getPath())));
        } catch (IOException e) {
            log.error("Fatal error reading {}. Now exiting.", this.configuration.getPath(), e);
            System.exit(1);
        }
        CsvParserSettings csvParserSettings = new CsvParserSettings();
        ((CsvFormat) csvParserSettings.getFormat()).setDelimiter(this.configuration.getDelimiter());
        ((CsvFormat) csvParserSettings.getFormat()).setQuote(this.configuration.getQuoteChar());
        csvParserSettings.setNumberOfRowsToSkip(this.configuration.getSkipLines());
        csvParserSettings.setNumberOfRecordsToRead(this.configuration.getRowLimit());
        csvParserSettings.setNullValue("");
        csvParserSettings.setEmptyValue("");
        if (this.configuration.getColLimit() != -1) {
            csvParserSettings.selectIndexes((Integer[]) IntStream.range(0, this.configuration.getColLimit()).boxed().toArray(i -> {
                return new Integer[i];
            }));
        }
        CsvParser csvParser = new CsvParser(csvParserSettings);
        csvParser.beginParsing(bufferedReader);
        return csvParser;
    }

    private void transformColumns() {
        ListMultimap<ImmutableRoaringBitmap, Integer> createDuplicateColumnMapping = createDuplicateColumnMapping();
        this.columnPlis = ImmutableList.copyOf((Collection) createDuplicateColumnMapping.keySet());
        ArrayList arrayList = new ArrayList(this.columnPlis.size());
        Iterator<ImmutableRoaringBitmap> it2 = this.columnPlis.iterator();
        while (it2.hasNext()) {
            arrayList.add(ImmutableList.copyOf((Collection) createDuplicateColumnMapping.get((ListMultimap<ImmutableRoaringBitmap, Integer>) it2.next())));
        }
        this.columnIndexToDuplicatesMapping = ImmutableList.copyOf((Collection) arrayList);
        if (log.isDebugEnabled()) {
            log.debug("Duplicate columns: {}", arrayList);
            Iterator<ImmutableRoaringBitmap> it3 = this.columnPlis.iterator();
            while (it3.hasNext()) {
                log.debug("Column null values: {}/{} ", Integer.valueOf(it3.next().getCardinality()), Integer.valueOf(this.nRowsDistinct));
            }
        }
    }

    private ListMultimap<ImmutableRoaringBitmap, Integer> createDuplicateColumnMapping() {
        ListMultimap build = MultimapBuilder.linkedHashKeys().arrayListValues().build();
        for (int i = 0; i < this.columnPlisMutable.size(); i++) {
            build.put(this.columnPlisMutable.get(i).toImmutableRoaringBitmap(), Integer.valueOf(i));
        }
        return build;
    }

    private ListMultimap<ImmutableRoaringBitmap, Integer> createDuplicateColumnMapping(double d) {
        ListMultimap build = MultimapBuilder.linkedHashKeys().arrayListValues().build();
        for (int i = 0; i < this.columnPlisMutable.size(); i++) {
            boolean z = false;
            ImmutableRoaringBitmap immutableRoaringBitmap = this.columnPlisMutable.get(i).toImmutableRoaringBitmap();
            Iterator it2 = build.keySet().iterator();
            while (true) {
                if (!it2.hasNext()) {
                    break;
                }
                ImmutableRoaringBitmap immutableRoaringBitmap2 = (ImmutableRoaringBitmap) it2.next();
                if (calculateSupport(ImmutableRoaringBitmap.xor(immutableRoaringBitmap2, immutableRoaringBitmap)) <= d) {
                    build.put(immutableRoaringBitmap2, Integer.valueOf(i));
                    z = true;
                    break;
                }
            }
            if (!z) {
                build.put(immutableRoaringBitmap, Integer.valueOf(i));
            }
        }
        return build;
    }

    private double calculateSupport(ImmutableRoaringBitmap immutableRoaringBitmap) {
        double d = 0.0d;
        if (immutableRoaringBitmap.cardinalityExceeds(0L)) {
            while (immutableRoaringBitmap.iterator().hasNext()) {
                d += this.rowCounts[r0.next().intValue()];
            }
        }
        return d / this.nRows;
    }

    /* JADX WARN: Type inference failed for: r1v2, types: [it.unimi.dsi.fastutil.ints.IntCollection] */
    private void transformRows() {
        this.rowCounts = this.rowDeduplicator.values2().toIntArray();
    }

    public List<ImmutableRoaringBitmap> getColumnPlis() {
        return this.columnPlis;
    }

    public List<List<Integer>> getColumnIndexToDuplicatesMapping() {
        return this.columnIndexToDuplicatesMapping;
    }

    public String[] getColumnIndexToNameMapping() {
        return this.columnIndexToNameMapping;
    }

    public int getNRows() {
        return this.nRows;
    }

    public int[] getRowCounts() {
        return this.rowCounts;
    }
}
