# encoding=utf-8 from sklearn.feature_extraction.text import TfidfVectorizer, TfidfTransformer corpus = [ 'This This is the first document.', 'This This is the second second document.', 'And the third one.', 'Is this the first document?', ] tfidf_model = TfidfVectorizer() tfidf_matrix = tfidf_model.fit_transform(corpus) word_dict=tfidf_model.get_feature_names() print(word_dict) print(tfidf_matrix)
实验结果:python
"C:\Program Files\Anaconda3\python.exe" D:/pycharmprogram/csgwork/find_classification_keys/test_tfidfVectorizer.py ['and', 'document', 'first', 'is', 'one', 'second', 'the', 'third', 'this'] (0, 8) 0.6986804246371375 (0, 3) 0.34934021231856877 (0, 6) 0.2856085141790751 (0, 2) 0.43150466158747897 (0, 1) 0.34934021231856877 (1, 8) 0.49256714844677196 (1, 3) 0.24628357422338598 (1, 6) 0.20135295972313796 (1, 1) 0.24628357422338598 (1, 5) 0.7717016211057586 (2, 6) 0.2884767487500274 (2, 0) 0.5528053199908667 (2, 7) 0.5528053199908667 (2, 4) 0.5528053199908667 (3, 8) 0.4387767428592343 (3, 3) 0.4387767428592343 (3, 6) 0.35872873824808993 (3, 2) 0.5419765697264572 (3, 1) 0.4387767428592343 Process finished with exit code 0