/*
 * Decompiled with CFR 0.152.
 */
package com.hankcs.hanlp.mining.word;

import com.hankcs.hanlp.algorithm.MaxHeap;
import com.hankcs.hanlp.corpus.io.IOUtil;
import com.hankcs.hanlp.mining.word.TfIdf;
import com.hankcs.hanlp.seg.Segment;
import com.hankcs.hanlp.seg.common.Term;
import com.hankcs.hanlp.summary.KeywordExtractor;
import com.hankcs.hanlp.tokenizer.StandardTokenizer;
import com.hankcs.hanlp.utility.Predefine;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;

public class TfIdfCounter
extends KeywordExtractor {
    private boolean filterStopWord;
    private Map<Object, Map<String, Double>> tfMap;
    private Map<Object, Map<String, Double>> tfidfMap;
    private Map<String, Double> idf;

    public TfIdfCounter() {
        this(true);
    }

    public TfIdfCounter(boolean filterStopWord) {
        this(StandardTokenizer.SEGMENT, filterStopWord);
    }

    public TfIdfCounter(Segment defaultSegment, boolean filterStopWord) {
        super(defaultSegment);
        this.filterStopWord = filterStopWord;
        this.tfMap = new HashMap<Object, Map<String, Double>>();
    }

    public TfIdfCounter(Segment defaultSegment) {
        this(defaultSegment, true);
    }

    @Override
    public List<String> getKeywords(List<Term> termList, int size) {
        List<Map.Entry<String, Double>> entryList = this.getKeywordsWithTfIdf(termList, size);
        ArrayList<String> r = new ArrayList<String>(entryList.size());
        for (Map.Entry<String, Double> entry : entryList) {
            r.add(entry.getKey());
        }
        return r;
    }

    public List<Map.Entry<String, Double>> getKeywordsWithTfIdf(String document, int size) {
        return this.getKeywordsWithTfIdf(this.preprocess(document), size);
    }

    public List<Map.Entry<String, Double>> getKeywordsWithTfIdf(List<Term> termList, int size) {
        if (this.idf == null) {
            this.compute();
        }
        Map<String, Double> tfIdf = TfIdf.tfIdf(TfIdf.tf(TfIdfCounter.convert(termList)), this.idf);
        return this.topN(tfIdf, size);
    }

    public void add(Object id, List<Term> termList) {
        List<String> words = TfIdfCounter.convert(termList);
        Map<String, Double> tf = TfIdf.tf(words);
        this.tfMap.put(id, tf);
        this.idf = null;
    }

    private static List<String> convert(List<Term> termList) {
        ArrayList<String> words = new ArrayList<String>(termList.size());
        for (Term term : termList) {
            words.add(term.word);
        }
        return words;
    }

    public void add(List<Term> termList) {
        this.add((Object)this.tfMap.size(), termList);
    }

    public void add(Object id, String text) {
        List<Term> termList = this.preprocess(text);
        this.add(id, termList);
    }

    private List<Term> preprocess(String text) {
        List<Term> termList = this.defaultSegment.seg(text);
        if (this.filterStopWord) {
            this.filter(termList);
        }
        return termList;
    }

    public int add(String text) {
        int id = this.tfMap.size();
        this.add((Object)id, text);
        return id;
    }

    public void loadIdfFile(String idfPath) {
        String line = null;
        boolean first = true;
        try {
            this.idf = new HashMap<String, Double>();
            BufferedReader bw = new BufferedReader(new InputStreamReader(IOUtil.newInputStream(idfPath), "UTF-8"));
            while ((line = bw.readLine()) != null) {
                if (first) {
                    first = false;
                    if (!line.isEmpty() && line.charAt(0) == '\ufeff') {
                        line = line.substring(1);
                    }
                }
                String[] lineValue = line.split(" ");
                this.idf.put(lineValue[0], Double.valueOf(lineValue[1]));
            }
            bw.close();
        }
        catch (Exception e) {
            Predefine.logger.warning("\u52a0\u8f7d" + idfPath + "\u5931\u8d25\uff0c" + e);
            throw new RuntimeException("\u8f7d\u5165\u53cd\u6587\u6863\u8bcd\u9891\u6587\u4ef6" + idfPath + "\u5931\u8d25");
        }
    }

    public Map<Object, Map<String, Double>> compute() {
        if (this.idf == null) {
            this.idf = TfIdf.idfFromTfs(this.tfMap.values());
        }
        this.tfidfMap = new HashMap<Object, Map<String, Double>>(this.idf.size());
        for (Map.Entry<Object, Map<String, Double>> entry : this.tfMap.entrySet()) {
            Map<String, Double> tfidf = TfIdf.tfIdf(entry.getValue(), this.idf);
            this.tfidfMap.put(entry.getKey(), tfidf);
        }
        return this.tfidfMap;
    }

    public List<Map.Entry<String, Double>> getKeywordsOf(Object id) {
        return this.getKeywordsOf(id, 10);
    }

    public List<Map.Entry<String, Double>> getKeywordsOf(Object id, int size) {
        Map<String, Double> tfidfs = this.tfidfMap.get(id);
        if (tfidfs == null) {
            return null;
        }
        return this.topN(tfidfs, size);
    }

    private List<Map.Entry<String, Double>> topN(Map<String, Double> tfidfs, int size) {
        MaxHeap<Map.Entry<String, Double>> heap = new MaxHeap<Map.Entry<String, Double>>(size, new Comparator<Map.Entry<String, Double>>(){

            @Override
            public int compare(Map.Entry<String, Double> o1, Map.Entry<String, Double> o2) {
                return o1.getValue().compareTo(o2.getValue());
            }
        });
        heap.addAll(tfidfs.entrySet());
        return heap.toList();
    }

    public Set<Object> documents() {
        return this.tfMap.keySet();
    }

    public Map<Object, Map<String, Double>> getTfMap() {
        return this.tfMap;
    }

    public List<Map.Entry<String, Double>> sortedAllTf() {
        return TfIdfCounter.sort(this.allTf());
    }

    public List<Map.Entry<String, Integer>> sortedAllTfInt() {
        return TfIdfCounter.doubleToInteger(this.sortedAllTf());
    }

    public Map<String, Double> allTf() {
        HashMap<String, Double> result = new HashMap<String, Double>();
        for (Map<String, Double> d : this.tfMap.values()) {
            for (Map.Entry<String, Double> tf : d.entrySet()) {
                Double f = (Double)result.get(tf.getKey());
                if (f == null) {
                    result.put(tf.getKey(), tf.getValue());
                    continue;
                }
                result.put(tf.getKey(), f + tf.getValue());
            }
        }
        return result;
    }

    private static List<Map.Entry<String, Double>> sort(Map<String, Double> map) {
        ArrayList<Map.Entry<String, Double>> list = new ArrayList<Map.Entry<String, Double>>(map.entrySet());
        Collections.sort(list, new Comparator<Map.Entry<String, Double>>(){

            @Override
            public int compare(Map.Entry<String, Double> o1, Map.Entry<String, Double> o2) {
                return o2.getValue().compareTo(o1.getValue());
            }
        });
        return list;
    }

    private static List<Map.Entry<String, Integer>> doubleToInteger(List<Map.Entry<String, Double>> list) {
        ArrayList<Map.Entry<String, Integer>> result = new ArrayList<Map.Entry<String, Integer>>(list.size());
        for (Map.Entry<String, Double> entry : list) {
            result.add(new AbstractMap.SimpleEntry<String, Integer>(entry.getKey(), entry.getValue().intValue()));
        }
        return result;
    }
}

