package de.metanome.backend.result_postprocessing.helper;

import de.metanome.algorithm_integration.input.InputIterationException;
import de.metanome.algorithm_integration.input.RelationalInput;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Collections;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import org.apache.lucene.util.packed.PackedInts;

/* loaded from: input_file:de/metanome/backend/result_postprocessing/helper/ColumnInformation.class */
public class ColumnInformation {
    private static final Map<ColumnType, Integer> contentSizes;
    private String columnName;
    private int columnIndex;
    private BitSet bitSet;
    private ColumnType columnType;
    private long distinctValuesCount;
    private long nullValuesCount;
    private long rowCount;
    private Histogram histogram;
    private float averageValueLength;

    /* loaded from: input_file:de/metanome/backend/result_postprocessing/helper/ColumnInformation$ColumnType.class */
    public enum ColumnType {
        INTEGER_COLUMN,
        FLOAT_COLUMN,
        DATE_COLUMN,
        BOOLEAN_COLUMN,
        STRING_COLUMN
    }

    public ColumnInformation(String str, int i, BitSet bitSet) throws InputIterationException {
        this(str, i, bitSet, null, false);
    }

    public ColumnInformation(String str, int i, BitSet bitSet, RelationalInput relationalInput, boolean z) throws InputIterationException {
        this.columnName = null;
        this.columnType = ColumnType.STRING_COLUMN;
        this.distinctValuesCount = 0L;
        this.nullValuesCount = 0L;
        this.rowCount = 0L;
        this.histogram = null;
        this.averageValueLength = PackedInts.COMPACT;
        this.columnName = str;
        this.columnIndex = i;
        this.bitSet = bitSet;
        if (z) {
            computeDataDependentStatistics(relationalInput);
        }
    }

    protected void computeDataDependentStatistics(RelationalInput relationalInput) throws InputIterationException {
        createHistogramAndDetermineType(relationalInput);
        computeDistinctValuesCount();
        computeNullValuesCount();
        computeAverageLength();
    }

    protected void createHistogramAndDetermineType(RelationalInput relationalInput) throws InputIterationException {
        this.histogram = new Histogram();
        this.rowCount = 0L;
        EnumMap enumMap = new EnumMap(ColumnType.class);
        while (relationalInput.hasNext()) {
            String str = relationalInput.next().get(this.columnIndex);
            if (str != null) {
                ColumnType cellType = getCellType(str);
                if (enumMap.containsKey(cellType)) {
                    enumMap.put((EnumMap) cellType, (ColumnType) Integer.valueOf(enumMap.get(cellType).intValue() + 1));
                } else {
                    enumMap.put((EnumMap) cellType, (ColumnType) 1);
                }
            }
            this.histogram.addValue(str);
            this.rowCount++;
        }
        this.columnType = determineColumnType(enumMap);
    }

    protected ColumnType getCellType(String str) {
        return isBooleanValue(str) ? ColumnType.BOOLEAN_COLUMN : isIntegerValue(str) ? ColumnType.INTEGER_COLUMN : isFloatValue(str) ? ColumnType.FLOAT_COLUMN : isDateValue(str) ? ColumnType.DATE_COLUMN : ColumnType.STRING_COLUMN;
    }

    private ColumnType determineColumnType(Map<ColumnType, Integer> map) {
        ColumnType columnType = ColumnType.STRING_COLUMN;
        Integer num = 0;
        for (Map.Entry<ColumnType, Integer> entry : map.entrySet()) {
            if (entry.getValue().intValue() > num.intValue()) {
                columnType = entry.getKey();
                num = entry.getValue();
            }
        }
        return columnType;
    }

    protected void computeDistinctValuesCount() {
        if (this.histogram != null) {
            this.distinctValuesCount = this.histogram.getHistogramData().keySet().size();
        }
    }

    protected void computeNullValuesCount() {
        if (this.histogram != null) {
            this.nullValuesCount = this.histogram.getNullCount().longValue();
        }
    }

    protected boolean isBooleanValue(String str) {
        return new HashSet(Arrays.asList("0", "false", "f", "no", "n")).contains(str.toLowerCase()) || new HashSet(Arrays.asList("1", "true", "t", "yes", "y")).contains(str.toLowerCase());
    }

    protected boolean isIntegerValue(String str) {
        return str.matches("\\d+");
    }

    protected boolean isFloatValue(String str) {
        return str.matches("[+-]?(?=\\d*[.eE])(?=\\.?\\d)\\d*\\.?\\d*(?:[eE][+-]?\\d+)?");
    }

    protected boolean isDateValue(String str) {
        for (String str2 : new String[]{"M/dd/yyyy", "dd.M.yyyy", "M/dd/yyyy hh:mm:ss a", "dd.M.yyyy hh:mm:ss a", "dd.MMM.yyyy", "dd-MMM-yyyy"}) {
            if (isDateValid(str, str2)) {
                return true;
            }
        }
        return false;
    }

    protected boolean isDateValid(String str, String str2) {
        SimpleDateFormat simpleDateFormat = new SimpleDateFormat(str2);
        simpleDateFormat.setLenient(false);
        try {
            simpleDateFormat.parse(str);
            return true;
        } catch (ParseException e) {
            return false;
        }
    }

    protected void computeAverageLength() {
        if (this.columnType != ColumnType.STRING_COLUMN) {
            this.averageValueLength = PackedInts.COMPACT;
            return;
        }
        long j = 0;
        Iterator<Map.Entry<String, Long>> it2 = this.histogram.getHistogramData().entrySet().iterator();
        while (it2.hasNext()) {
            j += r0.getKey().length() * it2.next().getValue().longValue();
        }
        this.averageValueLength = ((float) j) / ((float) this.rowCount);
    }

    public boolean isUniqueColumn() {
        return getDistinctValuesCount() == getRowCount();
    }

    public double getUniquenessRate() {
        if (this.rowCount == 0) {
            return 1.0d;
        }
        return this.distinctValuesCount / this.rowCount;
    }

    public double getNullRate() {
        if (this.rowCount == 0) {
            return 0.0d;
        }
        return this.nullValuesCount / this.rowCount;
    }

    public long getInformationContent(long j) {
        float intValue = contentSizes.get(this.columnType).intValue();
        if (this.columnType == ColumnType.STRING_COLUMN) {
            intValue *= this.averageValueLength;
        }
        return intValue * ((float) j);
    }

    public String getColumnName() {
        return this.columnName;
    }

    public long getDistinctValuesCount() {
        return this.distinctValuesCount;
    }

    public long getNullValuesCount() {
        return this.nullValuesCount;
    }

    public ColumnType getColumnType() {
        return this.columnType;
    }

    public Histogram getHistogram() {
        return this.histogram;
    }

    public long getRowCount() {
        return this.rowCount;
    }

    public float getAverageValueLength() {
        return this.averageValueLength;
    }

    public BitSet getBitSet() {
        return this.bitSet;
    }

    public int getColumnIndex() {
        return this.columnIndex;
    }

    static {
        HashMap hashMap = new HashMap();
        hashMap.put(ColumnType.BOOLEAN_COLUMN, 1);
        hashMap.put(ColumnType.DATE_COLUMN, 4);
        hashMap.put(ColumnType.FLOAT_COLUMN, 4);
        hashMap.put(ColumnType.INTEGER_COLUMN, 4);
        hashMap.put(ColumnType.STRING_COLUMN, 8);
        contentSizes = Collections.unmodifiableMap(hashMap);
    }
}
