package com.hankcs.hanlp.classification.classifiers;

import com.hankcs.hanlp.classification.corpus.Document;
import com.hankcs.hanlp.classification.corpus.IDataSet;
import com.hankcs.hanlp.classification.features.BaseFeatureData;
import com.hankcs.hanlp.classification.features.ChiSquareFeatureExtractor;
import com.hankcs.hanlp.classification.models.AbstractModel;
import com.hankcs.hanlp.classification.models.NaiveBayesModel;
import com.hankcs.hanlp.classification.utilities.io.ConsoleLogger;
import com.hankcs.hanlp.collection.trie.bintrie.BinTrie;
import com.hankcs.hanlp.utility.MathUtility;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;

/* loaded from: classes5.dex */
public class NaiveBayesClassifier extends AbstractClassifier {

    /* renamed from: 刻槒唱镧詴, reason: contains not printable characters */
    public NaiveBayesModel f8071;

    public NaiveBayesClassifier() {
        this(null);
    }

    public NaiveBayesClassifier(NaiveBayesModel naiveBayesModel) {
        this.f8071 = naiveBayesModel;
    }

    @Override // com.hankcs.hanlp.classification.classifiers.IClassifier
    public double[] categorize(Document document) throws IllegalArgumentException, IllegalStateException {
        NaiveBayesModel naiveBayesModel = this.f8071;
        double[] dArr = new double[naiveBayesModel.catalog.length];
        for (Map.Entry<Integer, Double> entry : naiveBayesModel.logPriors.entrySet()) {
            Integer key = entry.getKey();
            Double value = entry.getValue();
            Iterator<Map.Entry<Integer, int[]>> it = document.tfMap.entrySet().iterator();
            while (it.hasNext()) {
                Integer key2 = it.next().getKey();
                if (this.f8071.logLikelihoods.containsKey(key2)) {
                    value = Double.valueOf(value.doubleValue() + (Integer.valueOf(r5.getValue()[0]).intValue() * this.f8071.logLikelihoods.get(key2).get(key).doubleValue()));
                }
            }
            dArr[key.intValue()] = value.doubleValue();
        }
        if (this.f8070) {
            MathUtility.normalizeExp(dArr);
        }
        return dArr;
    }

    @Override // com.hankcs.hanlp.classification.classifiers.IClassifier
    public AbstractModel getModel() {
        return this.f8071;
    }

    public NaiveBayesModel getNaiveBayesModel() {
        return this.f8071;
    }

    @Override // com.hankcs.hanlp.classification.classifiers.IClassifier
    public Map<String, Double> predict(String str) throws IllegalArgumentException, IllegalStateException {
        if (this.f8071 == null) {
            throw new IllegalStateException("未训练模型！无法执行预测！");
        }
        if (str == null) {
            throw new IllegalArgumentException("参数 text == null");
        }
        NaiveBayesModel naiveBayesModel = this.f8071;
        return predict(new Document(naiveBayesModel.wordIdTrie, naiveBayesModel.tokenizer.segment(str)));
    }

    public BaseFeatureData selectFeatures(IDataSet iDataSet) {
        ChiSquareFeatureExtractor chiSquareFeatureExtractor = new ChiSquareFeatureExtractor();
        ConsoleLogger.logger.start("使用卡方检测选择特征中...", new Object[0]);
        BaseFeatureData extractBasicFeatureData = ChiSquareFeatureExtractor.extractBasicFeatureData(iDataSet);
        Map<Integer, Double> chi_square = chiSquareFeatureExtractor.chi_square(extractBasicFeatureData);
        int size = chi_square.size();
        int[][] iArr = new int[size];
        extractBasicFeatureData.wordIdTrie = new BinTrie<>();
        String[] wordIdArray = iDataSet.getLexicon().getWordIdArray();
        int i = -1;
        for (Integer num : chi_square.keySet()) {
            i++;
            iArr[i] = extractBasicFeatureData.featureCategoryJointCount[num.intValue()];
            extractBasicFeatureData.wordIdTrie.put(wordIdArray[num.intValue()], (String) Integer.valueOf(i));
        }
        ConsoleLogger.logger.finish(",选中特征数:%d / %d = %.2f%%\n", Integer.valueOf(size), Integer.valueOf(extractBasicFeatureData.featureCategoryJointCount.length), Double.valueOf((size / extractBasicFeatureData.featureCategoryJointCount.length) * 100.0d));
        extractBasicFeatureData.featureCategoryJointCount = iArr;
        return extractBasicFeatureData;
    }

    @Override // com.hankcs.hanlp.classification.classifiers.IClassifier
    public void train(IDataSet iDataSet) {
        ConsoleLogger.logger.out("原始数据集大小:%d\n", Integer.valueOf(iDataSet.size()));
        BaseFeatureData selectFeatures = selectFeatures(iDataSet);
        NaiveBayesModel naiveBayesModel = new NaiveBayesModel();
        this.f8071 = naiveBayesModel;
        naiveBayesModel.n = selectFeatures.n;
        naiveBayesModel.d = selectFeatures.featureCategoryJointCount.length;
        naiveBayesModel.c = selectFeatures.categoryCounts.length;
        naiveBayesModel.logPriors = new TreeMap();
        int i = 0;
        while (true) {
            if (i >= selectFeatures.categoryCounts.length) {
                break;
            }
            this.f8071.logPriors.put(Integer.valueOf(i), Double.valueOf(Math.log(r2[i] / this.f8071.n)));
            i++;
        }
        TreeMap treeMap = new TreeMap();
        for (Integer num : this.f8071.logPriors.keySet()) {
            Double valueOf = Double.valueOf(0.0d);
            for (int i2 = 0; i2 < selectFeatures.featureCategoryJointCount.length; i2++) {
                valueOf = Double.valueOf(valueOf.doubleValue() + selectFeatures.featureCategoryJointCount[i2][num.intValue()]);
            }
            treeMap.put(num, valueOf);
        }
        for (Integer num2 : this.f8071.logPriors.keySet()) {
            int i3 = 0;
            while (true) {
                if (i3 < selectFeatures.featureCategoryJointCount.length) {
                    double log = Math.log((r6[i3][num2.intValue()] + 1.0d) / (((Double) treeMap.get(num2)).doubleValue() + this.f8071.d));
                    if (!this.f8071.logLikelihoods.containsKey(Integer.valueOf(i3))) {
                        this.f8071.logLikelihoods.put(Integer.valueOf(i3), new TreeMap());
                    }
                    this.f8071.logLikelihoods.get(Integer.valueOf(i3)).put(num2, Double.valueOf(log));
                    i3++;
                }
            }
        }
        ConsoleLogger.logger.out("贝叶斯统计结束\n", new Object[0]);
        this.f8071.catalog = iDataSet.getCatalog().toArray();
        this.f8071.tokenizer = iDataSet.getTokenizer();
        this.f8071.wordIdTrie = selectFeatures.wordIdTrie;
    }
}
