Document.java

package edu.odu.cs.cs350.acmClassifier;

import java.util.ArrayList;

/**
 * A generic document, containing all the information Parser and Trainer need.
 */
public class Document implements Cloneable {
    /**
     * 
     * The raw text of the document.
     */
    public String rawText;

    /**
     * The tokenized, unpruned text of the document.
     */
    public ArrayList<String> tokenizedText;

    /**
     * An array of how many times each word in the document appears.
     */
    public ArrayList<Integer> wordCounts;

    /**
     * An array of normalized word counts.
     * Normalization has two steps:
     * 1. If word count is greater than 4, set normalized word count to 1.
     * Otherwise, set to 0.
     * 2. Multiply normalized word count by the inverse document frequency.
     */
    public ArrayList<Double> normalizedWordCounts;

    /**
     * The classification of the document.
     */
    public ACMClass classification;

    /**
     * Construct an empty, unclassified Document.
     */
    public Document() {
        rawText = new String("");
        tokenizedText = new ArrayList<String>();
        wordCounts = new ArrayList<Integer>();
        normalizedWordCounts = new ArrayList<Double>();
        classification = new ACMClass();
    }

    /**
     * Copy constructor.
     * 
     * @param doc the document to copy from
     */
    public Document(Document doc) {
        // TODO
    }

    @Override
    public Document clone() {
        return new Document(this);
    }

    @Override
    public String toString() {
        // TODO
        return "I am a Document";
    }
}