词性(Part of Speech, POS)。最常用的POS notification是宾州树库(Penn Tree Bank, PTB)标记集。
# POS tagging >>>import nltk>>>from nltk import word_tokenize>>>s="I was watching TV">>>print nltk.pos_tag(word_tokenize(s))# all nouns>>>tagged=nltk.pos_tag(word_tokenize(s))>>>allnoun=[word for word,pos in tagged if pos in['NN','NNP']]
# NER tagger
>>>import nltk
>>>from nltk import ne_chunk
>>>from nltk import word_tokenize
>>>sent = "Mark is studying at Stanford University in California"
>>>print(ne_chunk(nltk.pos_tag(word_tokenize(sent)), binary=False))
# NER stanford tagger
>>>from nltk.tag.stanford import NERTagger
>>>st = NERTagger('<PATH>/stanford-ner/classifiers/all.3class.distsim.crf.ser.gz',... '<PATH>/stanford-ner/stanford-ner.jar')
# <PATH> will be the relative path where you downloaded the tagger
>>>st.tag('Rami Eid is studying at Stony Brook University in NY'.split())
#http://nlp.stanford.edu/software/