namespace Test.TFIDF
{
class IF_IDF
{
///
/// 获取拆分后的词组以及每个词的出现次数
///
///
///
public Dictionary GetWordsFrequnce(string text)
{
Dictionary dictionary = new Dictionary();
Regex regex = new Regex(@"[\u4e00-\u9fa5]");//分拣出中文字符
MatchCollection results = regex.Matches(text);
int temp;
foreach (Match word in results)
{
if (dictionary.TryGetValue(word.Value, out temp))
{
temp++;
dictionary.Remove(word.Value);
dictionary.Add(word.Value, temp);
}
else
{
dictionary.Add(word.Value, 1);
}
}
return dictionary;
}
///
/// 文档中出现次数最多的词的出现次数
///
/// 拆分后的词组字典
///
public int MaxWordFrequence( Dictionary wordsfre)
{
Dictionary.ValueCollection values = wordsfre.Values;
int maxfre = 0;
foreach (int value in values)
{
if (maxfre < value)
{
maxfre = value;
}
}
return maxfre;
}
///
/// 计算某词的IF,返回结果
///
///
///
///
1