from textblob import TextBlob #textblob enters the scene
gravity = TextBlob('A screaming comes across the sky.')
gravity.words
WordList(['A', 'screaming', 'comes', 'across', 'the', 'sky'])
TextBlob('A screaming comes across the sky').words #it is a worlist object
WordList(['A', 'screaming', 'comes', 'across', 'the', 'sky'])
len(gravity.words)
6
gravity.words[:3]
WordList(['A', 'screaming', 'comes'])
sorted(gravity.words)
['A', 'across', 'comes', 'screaming', 'sky', 'the']
gravity.tokens
WordList(['A', 'screaming', 'comes', 'across', 'the', 'sky', '.'])
TextBlob("Can't touch this.").tokens
WordList(['Ca', "n't", 'touch', 'this', '.'])
TextBlob('Cannot touch this').tokens
WordList(['Can', 'not', 'touch', 'this'])
pride= TextBlob('It is a truth universally acknowledged, that a single man in possession of a good fortune, must be in want of a wife.')
pride.tags #a list of pairs of strings . NN means noun , IN means preposition and JJ means adjective
[('It', 'PRP'), ('is', 'VBZ'), ('a', 'DT'), ('truth', 'NN'), ('universally', 'RB'), ('acknowledged', 'VBD'), ('that', 'IN'), ('a', 'DT'), ('single', 'JJ'), ('man', 'NN'), ('in', 'IN'), ('possession', 'NN'), ('of', 'IN'), ('a', 'DT'), ('good', 'JJ'), ('fortune', 'NN'), ('must', 'MD'), ('be', 'VB'), ('in', 'IN'), ('want', 'NN'), ('of', 'IN'), ('a', 'DT'), ('wife', 'NN')]
count = 0
for (word,tag) in pride.tags:
if tag == 'JJ':
count = count +1
def adjs(word):
count = 0
for (word,tag) in word.tags:
if tag == 'JJ':
count = count +1
return count
adjs(pride)
2
adjs(gravity)
0
gravity.tags
[('A', 'DT'), ('screaming', 'NN'), ('comes', 'VBZ'), ('across', 'IN'), ('the', 'DT'), ('sky', 'NN')]
hr = TextBlob('All human beings are born free and equal in dignity and rights. They are endowed with reason and conscience and should act towards one another in a spirit of brotherhood.')
hr.sentences
[Sentence("All human beings are born free and equal in dignity and rights."), Sentence("They are endowed with reason and conscience and should act towards one another in a spirit of brotherhood.")]
len(hr.sentences)
2
source = open('pride') #the 3 steps to upload text
pride = source.read()
source.close()
source2 = open('moby')
moby = source2.read()
source2.close()
pride[:100]
'PRIDE & PREJUDICE.\n\n\n\n\nCHAPTER I.\n\n\nIt is a truth universally acknowledged, that a single man in pos'
pride[50:60]
'universall'
pride[50:100]
'universally acknowledged, that a single man in pos'
moby[:100]
'MOBY-DICK;\n\nor, THE WHALE.\n\nBy Herman Melville\n\n\n\nCONTENTS\n\nETYMOLOGY.\n\nEXTRACTS (Supplied by a Sub-'
moby[50:100]
'CONTENTS\n\nETYMOLOGY.\n\nEXTRACTS (Supplied by a Sub-'
prideblob = TextBlob(pride)
len(prideblob.words)
123090
len(prideblob.tokens)
144564
adjs(prideblob)
7110
mobyblob = TextBlob(moby)
adjs(mobyblob)
19018
adjs(mobyblob) / len(mobyblob.words)
0.08570952142738035
source = open('English_word_list')
words = source.read().split() #it splits this giant string into individual words
source.close()
words[:5] #the introduced text
['A', "A's", 'AA', "AA's", 'AAA']
for w in words:
if w[:len(w)//2] == w[len(w)//2:]:
print(w)
AA BB DD ISIS LL MM PP RR SS WW beriberi bonbon cancan cc chichi coco couscous dd dodo dumdum ff froufrou gaga hotshots ii ll lulu mama meme mm murmur muumuu papa pawpaw pompom pp tartar testes tsetse tutu xx
from textblob import Word #from textblob we import Wordnet
bank_word = Word('bank') # a variable for "bank" as an entry inside Wordnet
bank_word.synsets #the set of synonyms that each pertain to a different sence of bank_word (== bank)
[Synset('bank.n.01'), Synset('depository_financial_institution.n.01'), Synset('bank.n.03'), Synset('bank.n.04'), Synset('bank.n.05'), Synset('bank.n.06'), Synset('bank.n.07'), Synset('savings_bank.n.02'), Synset('bank.n.09'), Synset('bank.n.10'), Synset('bank.v.01'), Synset('bank.v.02'), Synset('bank.v.03'), Synset('bank.v.04'), Synset('bank.v.05'), Synset('deposit.v.02'), Synset('bank.v.07'), Synset('trust.v.01')]
bank_word.definitions # in nltk we would write bank_word.definitions()
['sloping land (especially the slope beside a body of water)', 'a financial institution that accepts deposits and channels the money into lending activities', 'a long ridge or pile', 'an arrangement of similar objects in a row or in tiers', 'a supply or stock held in reserve for future use (especially in emergencies)', 'the funds held by a gambling house or the dealer in some gambling games', 'a slope in the turn of a road or track; the outside is higher than the inside in order to reduce the effects of centrifugal force', 'a container (usually with a slot in the top) for keeping money at home', 'a building in which the business of banking transacted', 'a flight maneuver; aircraft tips laterally about its longitudinal axis (especially in turning)', 'tip laterally', 'enclose with a bank', 'do business with a bank or keep an account at a bank', 'act as the banker in a game or in gambling', 'be in the banking business', 'put into a bank account', 'cover with ashes so to control the rate of burning', 'have confidence or faith in']
bank1 = bank_word.synsets[0]
bank1.lemma_names()
['bank']
bank_word.synsets[1].lemma_names()
['depository_financial_institution', 'bank', 'banking_concern', 'banking_company']
bank3 = bank_word.synsets[2]
bank3.lemma_names()
['bank']
bank3
Synset('bank.n.03')
bank1 == bank3
False
bank1
Synset('bank.n.01')
#we had both noun synsets like 'bank.n.01' and 'bank.v.01'
from textblob.wordnet import NOUN #how to get the nouns from a list of synsets
bank_word.get_synsets(NOUN)
[Synset('bank.n.01'), Synset('depository_financial_institution.n.01'), Synset('bank.n.03'), Synset('bank.n.04'), Synset('bank.n.05'), Synset('bank.n.06'), Synset('bank.n.07'), Synset('savings_bank.n.02'), Synset('bank.n.09'), Synset('bank.n.10')]
from textblob.wordnet import VERB
bank_word.get_synsets(VERB)
[Synset('bank.v.01'), Synset('bank.v.02'), Synset('bank.v.03'), Synset('bank.v.04'), Synset('bank.v.05'), Synset('deposit.v.02'), Synset('bank.v.07'), Synset('trust.v.01')]
car_word = Word('car')
car_word.get_synsets(NOUN) #get_synsets(NOUN or VERB)
#synsets are ordered by how common each sense is
[Synset('car.n.01'), Synset('car.n.02'), Synset('car.n.03'), Synset('car.n.04'), Synset('cable_car.n.01')]
car1 = car_word.get_synsets(NOUN)[0] # defining car1 as car_word.get_synsets(NOUN)
car1.definition()
'a motor vehicle with four wheels; usually propelled by an internal combustion engine'
car1.hypernyms()
[Synset('motor_vehicle.n.01')]
car1.hypernyms()[0]
Synset('motor_vehicle.n.01')
car1.hypernyms()[0].lemma_names()
['motor_vehicle', 'automotive_vehicle']
car1.hyponyms() #the specific, the types of car
[Synset('ambulance.n.01'), Synset('beach_wagon.n.01'), Synset('bus.n.04'), Synset('cab.n.03'), Synset('compact.n.03'), Synset('convertible.n.01'), Synset('coupe.n.01'), Synset('cruiser.n.01'), Synset('electric.n.01'), Synset('gas_guzzler.n.01'), Synset('hardtop.n.01'), Synset('hatchback.n.01'), Synset('horseless_carriage.n.01'), Synset('hot_rod.n.01'), Synset('jeep.n.01'), Synset('limousine.n.01'), Synset('loaner.n.02'), Synset('minicar.n.01'), Synset('minivan.n.01'), Synset('model_t.n.01'), Synset('pace_car.n.01'), Synset('racer.n.02'), Synset('roadster.n.01'), Synset('sedan.n.01'), Synset('sport_utility.n.01'), Synset('sports_car.n.01'), Synset('stanley_steamer.n.01'), Synset('stock_car.n.01'), Synset('subcompact.n.01'), Synset('touring_car.n.01'), Synset('used-car.n.01')]
river = Word('river').get_synsets(NOUN)[0]
river.definition()
'a large natural stream of water (larger than a creek)'
enigma = Word('enigma').get_synsets(NOUN)[0]
enigma.definition()
'something that baffles understanding and cannot be explained'
river.path_similarity(enigma) # path_similarity to see how close or far are two different nouns
0.06666666666666667
river.path_similarity(bank1)
0.1111111111111111
car1.path_similarity(enigma)
0.05
car1.path_similarity(bank1)
0.08333333333333333
enigma.path_similarity(bank1)
0.06666666666666667
import re
things = ['counter','original','spare','strange','fickle','freckled','swift','slow','sweet','sour','adazzle','dim']
def pied(words):
verse = ('All things ' + words[0] + ',' + words[1] + ',' + words[2] + ',' + words[3] + ';' + '\n' + ' Whatever is ' + words[4] + ',' + words[5] + '(who knows how?)\n' + ' With' + words[6] + ',' + words[7] + ';' + words[8] +';' + words[9] + ';' + words[10] + ',' + words[11] + ';\n' + 'He fathers-forth whose beauty is past change:\n'+ ' Praise him')
print(verse)
pied(things)
All things counter,original,spare,strange; Whatever is fickle,freckled(who knows how?) Withswift,slow;sweet;sour;adazzle,dim; He fathers-forth whose beauty is past change: Praise him