from nltk.book import *
>>> type(text1)
<class 'nltk.text.Text'>
http://nltk.googlecode.com/svn/trunk/doc/api/nltk.text.Text-class.html
text1.concordance("monstrous")
text1.similar("monstrous")
sorted(set(text3))
>>> f = FreqDist(text1)
>>> f
<FreqDist with 19317 samples and 260819 outcomes>
http://nltk.googlecode.com/svn/trunk/doc/api/nltk.probability.FreqDist-class.html
>>> v = f.keys()
>>> type(v)
<type 'list'>
>>> V = set(text1)
>>> len(V) == len(v)
True
>>> long_words = [w for w in V if len(w) > 15]
>>> sorted(long_words)
Function | Meaning |
---|---|
s.startswith(t) | test if s starts with t |
s.endswith(t) | test if s ends with t |
t in s | test if t is contained inside s |
s.islower() | test if all cased characters in s are lowercase |
s.isupper() | test if all cased characters in s are uppercase |
s.isalpha() | test if all characters in s are alphabetic |
s.isalnum() | test if all characters in s are alphanumeric |
s.isdigit() | test if all characters in s are digits |
s.istitle() | test if s is titlecased (all words in s have have initial capitals) |
bigrams(text1)
>>> [w.upper() for w in s] //cap all elements
>>> len(set([word.lower() for word in text1]))