上传者: 38726712
|
上传时间: 2021-11-04 21:22:31
|
文件大小: 513KB
|
文件类型: -
import re
import numpy as np
import pandas as pd
import nltk.tokenize as tk
import nltk.corpus as nc
handel_file = 'health_handel.csv' #分词好要保存的数据文件路径
#读取数据
data=pd.read_excel('health.xlsx')
print(data.head(10))
stopwords = nc.stopwords.words('english') #停用词
tokenizer=tk.WordPunctTokenizer() #分词器