import re
import numpy as np
import pandas as pd
import nltk.tokenize as tk
import nltk.corpus as nc
handel_file = 'health_handel.csv' #分词好要保存的数据文件路径
#读取数据
data=pd.read_excel('health.xlsx')
print(data.head(10))
stopwords = nc.stopwords.words('english') #停用词
tokenizer=tk.WordPunctTokenizer() #分词器
1