package de.tuberlin.cis.bilke.dumas.string.tokens;

import de.tuberlin.cis.bilke.dumas.DumasProperties;
import java.util.ArrayList;
import java.util.Map;
import java.util.TreeMap;

/* loaded from: input_file:de/tuberlin/cis/bilke/dumas/string/tokens/SimpleTokenizer.class */
public class SimpleTokenizer implements Tokenizer {
    public static final SimpleTokenizer DEFAULT_TOKENIZER = new SimpleTokenizer(true, true);
    protected boolean _ignorePunctuation;
    protected boolean _ignoreCase;
    protected Token _nullToken = null;
    private int _nextId = 0;
    private Map _tokMap = new TreeMap();

    public SimpleTokenizer(boolean z, boolean z2) {
        this._ignorePunctuation = true;
        this._ignoreCase = true;
        this._ignorePunctuation = z;
        this._ignoreCase = z2;
        initializeNullToken();
    }

    private void initializeNullToken() {
        int i = this._nextId + 1;
        this._nextId = i;
        this._nullToken = new Token(i, null);
    }

    public void setIgnorePunctuation(boolean z) {
        this._ignorePunctuation = z;
    }

    public void setIgnoreCase(boolean z) {
        this._ignoreCase = z;
    }

    public String toString() {
        return "[SimpleTokenizer " + this._ignorePunctuation + ";" + this._ignoreCase + "]";
    }

    @Override // de.tuberlin.cis.bilke.dumas.string.tokens.Tokenizer
    public Token[] tokenize(String str) {
        if (str == null) {
            Token internSomething = internSomething(str);
            return internSomething == null ? new Token[0] : new Token[]{internSomething};
        }
        ArrayList arrayList = new ArrayList();
        int i = 0;
        while (i < str.length()) {
            char charAt = str.charAt(i);
            if (Character.isWhitespace(charAt)) {
                i++;
            } else if (Character.isLetter(charAt)) {
                StringBuffer stringBuffer = new StringBuffer("");
                while (i < str.length() && Character.isLetter(str.charAt(i))) {
                    stringBuffer.append(str.charAt(i));
                    i++;
                }
                arrayList.add(internSomething(stringBuffer.toString()));
            } else if (Character.isDigit(charAt)) {
                StringBuffer stringBuffer2 = new StringBuffer("");
                while (i < str.length() && Character.isDigit(str.charAt(i))) {
                    stringBuffer2.append(str.charAt(i));
                    i++;
                }
                arrayList.add(internSomething(stringBuffer2.toString()));
            } else {
                if (!this._ignorePunctuation) {
                    StringBuffer stringBuffer3 = new StringBuffer("");
                    stringBuffer3.append(charAt);
                    arrayList.add(internSomething(stringBuffer3.toString()));
                }
                i++;
            }
        }
        return (Token[]) arrayList.toArray(new Token[arrayList.size()]);
    }

    @Override // de.tuberlin.cis.bilke.dumas.string.tokens.Tokenizer
    public Token[] tokenize(String[] strArr) {
        ArrayList arrayList = new ArrayList();
        for (String str : strArr) {
            for (Token token : tokenize(str)) {
                arrayList.add(token);
            }
        }
        return (Token[]) arrayList.toArray(new Token[arrayList.size()]);
    }

    private Token internSomething(String str) {
        if (str == null) {
            return intern(str);
        }
        return intern(this._ignoreCase ? str.toLowerCase() : str);
    }

    @Override // de.tuberlin.cis.bilke.dumas.string.tokens.Tokenizer
    public Token intern(String str) {
        if (str == null) {
            if (DumasProperties.nullTokens()) {
                return this._nullToken;
            }
            return null;
        }
        Token token = (Token) this._tokMap.get(str);
        if (token == null) {
            int i = this._nextId + 1;
            this._nextId = i;
            token = new Token(i, str);
            this._tokMap.put(str, token);
        }
        return token;
    }

    @Override // de.tuberlin.cis.bilke.dumas.string.tokens.Tokenizer
    public Token[] tokenizeTemp(String[] strArr) {
        return tokenize(strArr);
    }
}
