package ch.akuhn.hapax.index;

import ch.akuhn.foreach.Each;
import ch.akuhn.foreach.For;
import ch.akuhn.hapax.corpus.Corpus;
import ch.akuhn.hapax.corpus.PorterStemmer;
import ch.akuhn.hapax.corpus.Stemmer;
import ch.akuhn.hapax.corpus.Stopwords;
import ch.akuhn.hapax.corpus.Terms;
import ch.akuhn.hapax.linalg.SVD;
import ch.akuhn.hapax.linalg.SparseMatrix;
import ch.akuhn.hapax.linalg.Vector;
import ch.akuhn.util.Bag;
import ch.akuhn.util.IntArray;
import ch.akuhn.util.Pair;
import java.util.Iterator;
import java.util.NoSuchElementException;

/* loaded from: input_file:ch/akuhn/hapax/index/TermDocumentMatrix.class */
public class TermDocumentMatrix extends Corpus {
    private static final int DEFAULT_DIMENSIONS = 25;
    private AssociativeList<String> documents;
    private double[] globalWeightings;
    private IntArray lengthArray;
    private SparseMatrix matrix;
    private AssociativeList<String> terms;

    public TermDocumentMatrix() {
        this.matrix = new SparseMatrix(0, 0);
        this.terms = new AssociativeList<>();
        this.documents = new AssociativeList<>();
        this.lengthArray = new IntArray();
    }

    private TermDocumentMatrix(AssociativeList<String> associativeList, AssociativeList<String> associativeList2, IntArray intArray) {
        this.matrix = new SparseMatrix(associativeList.size(), associativeList2.size());
        this.terms = associativeList.m6clone();
        this.documents = associativeList2.m6clone();
        this.lengthArray = intArray.clone();
    }

    private void addToRow(String str, Vector vector) {
        this.matrix.addToRow(indexTerm(str), vector);
    }

    @Override // ch.akuhn.hapax.corpus.Corpus
    public void putDocument(String str, Terms terms) {
        int indexDocument = indexDocument(str);
        this.lengthArray.add(indexDocument, terms.size());
        Iterator it = terms.counts().iterator();
        while (it.hasNext()) {
            this.matrix.add(indexTerm((String) ((Bag.Count) it.next()).element), indexDocument, r0.count);
        }
    }

    @Override // ch.akuhn.hapax.corpus.Corpus
    public boolean containsDocument(String str) {
        return this.documents.contains(str);
    }

    public LatentSemanticIndex createIndex() {
        return createIndex(DEFAULT_DIMENSIONS);
    }

    public LatentSemanticIndex createIndex(int i) {
        return new LatentSemanticIndex(this.terms, this.documents, new SVD(this.matrix, i)).initializeGlobalWeightings(this.globalWeightings).initializeDocumentLength(this.lengthArray.asIntArray());
    }

    public double density() {
        return this.matrix.density();
    }

    @Override // ch.akuhn.hapax.corpus.Corpus
    public Iterable<String> documents() {
        return this.documents;
    }

    @Override // ch.akuhn.hapax.corpus.Corpus
    public int documentCount() {
        return this.documents.size();
    }

    private int indexTerm(String str) {
        int add = this.terms.add(str);
        if (add == this.matrix.rowCount()) {
            this.matrix.addRow();
        }
        return add;
    }

    private int indexDocument(String str) {
        int add = this.documents.add(str);
        if (add == this.matrix.columnCount()) {
            this.matrix.addColumn();
        }
        return add;
    }

    public TermDocumentMatrix rejectAndWeight() {
        return toLowerCase().rejectHapaxes().rejectStopwords().stem().weight(LocalWeighting.TERM, GlobalWeighting.IDF);
    }

    public TermDocumentMatrix rejectHapaxes() {
        return rejectLegomena(1);
    }

    public TermDocumentMatrix rejectLegomena(int i) {
        TermDocumentMatrix termDocumentMatrix = new TermDocumentMatrix(new AssociativeList(), this.documents, this.lengthArray);
        for (Pair<String, Vector> pair : termRowPairs()) {
            if (((Vector) pair.snd).used() > i) {
                termDocumentMatrix.addToRow((String) pair.fst, (Vector) pair.snd);
            }
        }
        return termDocumentMatrix;
    }

    public TermDocumentMatrix rejectStopwords() {
        return rejectStopwords(Stopwords.BASIC_ENGLISH);
    }

    public TermDocumentMatrix rejectStopwords(Stopwords stopwords) {
        TermDocumentMatrix termDocumentMatrix = new TermDocumentMatrix(new AssociativeList(), this.documents, this.lengthArray);
        for (Pair<String, Vector> pair : termRowPairs()) {
            if (!stopwords.contains(pair.fst)) {
                termDocumentMatrix.addToRow((String) pair.fst, (Vector) pair.snd);
            }
        }
        return termDocumentMatrix;
    }

    public TermDocumentMatrix stem() {
        return stem(new PorterStemmer());
    }

    public TermDocumentMatrix stem(Stemmer stemmer) {
        TermDocumentMatrix termDocumentMatrix = new TermDocumentMatrix(new AssociativeList(), this.documents, this.lengthArray);
        for (Pair<String, Vector> pair : termRowPairs()) {
            termDocumentMatrix.addToRow(stemmer.stem((CharSequence) pair.fst), (Vector) pair.snd);
        }
        return termDocumentMatrix;
    }

    @Override // ch.akuhn.hapax.corpus.Corpus
    public Terms terms() {
        Terms terms = new Terms();
        for (Pair<String, Vector> pair : termRowPairs()) {
            terms.add((String) pair.fst, (int) ((Vector) pair.snd).sum());
        }
        return terms;
    }

    private Iterable<Pair<String, Vector>> termRowPairs() {
        return Pair.zip(this.terms, this.matrix.rows());
    }

    @Override // ch.akuhn.hapax.corpus.Corpus
    public int termCount() {
        return this.terms.size();
    }

    public TermDocumentMatrix toLowerCase() {
        TermDocumentMatrix termDocumentMatrix = new TermDocumentMatrix(new AssociativeList(), this.documents, this.lengthArray);
        for (Pair<String, Vector> pair : termRowPairs()) {
            termDocumentMatrix.addToRow(((String) pair.fst).toString().toLowerCase(), (Vector) pair.snd);
        }
        return termDocumentMatrix;
    }

    public TermDocumentMatrix weight(LocalWeighting localWeighting, GlobalWeighting globalWeighting) {
        TermDocumentMatrix termDocumentMatrix = new TermDocumentMatrix(this.terms, this.documents, this.lengthArray);
        termDocumentMatrix.globalWeightings = new double[this.terms.size()];
        for (Each each : For.withIndex(this.matrix.rows())) {
            double[] dArr = termDocumentMatrix.globalWeightings;
            int i = each.index;
            double weight = globalWeighting.weight((Vector) each.value);
            dArr[i] = weight;
            for (Vector.Entry entry : ((Vector) each.value).entries()) {
                termDocumentMatrix.matrix.put(each.index, entry.index, localWeighting.weight(entry.value) * weight);
            }
        }
        return termDocumentMatrix;
    }

    @Override // ch.akuhn.hapax.corpus.Corpus
    public Terms getDocument(String str) {
        int i = this.documents.get((AssociativeList<String>) str);
        if (i == -1) {
            throw new NoSuchElementException();
        }
        Terms terms = new Terms();
        for (Pair<String, Vector> pair : termRowPairs()) {
            terms.add((String) pair.fst, (int) ((Vector) pair.snd).get(i));
        }
        return terms;
    }

    public SparseMatrix matrix() {
        return this.matrix;
    }

    public int[] getAllDocumentLength() {
        int[] iArr = new int[this.documents.size()];
        Iterator<Vector> it = this.matrix.rows().iterator();
        while (it.hasNext()) {
            for (Vector.Entry entry : it.next().entries()) {
                iArr[entry.index] = (int) (iArr[r1] + entry.value);
            }
        }
        return iArr;
    }

    public int getDocumentLength(String str) {
        int i = this.documents.get((AssociativeList<String>) str);
        if (i < 0) {
            return -1;
        }
        return this.lengthArray.get(i);
    }
}
