from sklearn.feature_extraction.text import TfidfTransformer from sklearn.feature_extraction.text import CountVectorizer from sklearn.feature_extraction.text import TfidfVectorizer corpus = [ 'This is the first document.', 'This is the second second document.', 'And the third one.', 'Is this the first document?', ] tfidf = TfidfVectorizer() re = tfidf.fit(corpus) name = tfidf.get_feature_names() print (re) f = re.transform(['This is the first document.']) score = f.data i = f.indices tfScore = [(name[x[0]],x[1]) for x in zip(i , score)] print(tfScore) tfScore = sorted(tfScore, key=lambda x: x[1], reverse=False) print(tfScore) print(name) print(f.indices) print(f) print(f.data)
发布者:全栈程序员-站长,转载请注明出处:https://javaforall.net/222547.html原文链接:https://javaforall.net
