package org.mcraig.cs445.refentry;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.ObjectOutputStream;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Properties;
import java.util.StringTokenizer;
/**
* Serializes a HashSet of common words to ignore when indexing and
* searching.
* @author <a href="mailto:mark@mcraig.org">Mark Craig</a>
*/
class CommonWordsGenerator {
/** Default constructor. */
public CommonWordsGenerator() { ; }
/**
* Generate a serialized HashSet of common words.
* @param properties Path to properties file containing
* <tt>CommonWords</tt> property having as its value the list
* of common words to ignore.
* @param serialized Path to serialized output HashSet.
*/
public void generate(String properties, String serialized) {
Properties props = new Properties();
try {
FileInputStream fip = new FileInputStream(properties);
props.load(new BufferedInputStream(fip));
fip.close();
} catch (Exception e) {
System.err.println("In CommonWordsGenerator::generate...");
System.err.println("Failed to load properties: " + properties);
System.err.println(e.toString());
}
HashSet words = new HashSet();
StringTokenizer in = new StringTokenizer(
props.getProperty("CommonWords"));
String tok = new String();
while (in.hasMoreTokens()) {
tok = in.nextToken();
if (tok.length() >= 3) words.add(tok.toLowerCase());
}
try {
ObjectOutputStream os =
new ObjectOutputStream(
new BufferedOutputStream(
new FileOutputStream(serialized)));
os.writeObject(words);
os.close();
} catch (Exception e) {
System.err.println("In CommonWordsGenerator::generate...");
System.err.println("Failed to serialze HashSet: " + serialized);
System.err.println(e.toString());
}
}
} |