# import TextBlob tokeniser:

from textblob import TextBlob


# providing a short sentence as an argument:

gravity = TextBlob('A screaming comes across the sky.')


# viewing the words inside a sentence as a WordList object:

gravity.words

WordList(['A', 'screaming', 'comes', 'across', 'the', 'sky'])


# another way to get the same result:

TextBlob('A screaming comes across the sky.').words

WordList(['A', 'screaming', 'comes', 'across', 'the', 'sky'])


#checking up the variable:

gravity

TextBlob("A screaming comes across the sky.")


# what is the length of the WordList:

len(gravity.words)

6


# see the first 3 words in the list:

gravity.words[:3]

WordList(['A', 'screaming', 'comes'])


# sorting the words alphabetically:

sorted(gravity.words)

['A', 'across', 'comes', 'screaming', 'sky', 'the']


# see the tokents of the list - the punctuation is also part of them

gravity.tokens

WordList(['A', 'screaming', 'comes', 'across', 'the', 'sky', '.'])


# what elements are taken as tokens, besides words:

TextBlob("Can't touch this.").tokens

WordList(['Ca', "n't", 'touch', 'this', '.'])


# models words linguistically

TextBlob('Cannot touch this.').tokens

WordList(['Can', 'not', 'touch', 'this', '.'])


# define a variable "pride"

pride = TextBlob('It is a truth universally acknowledged, that a single man in possession of a good fortune, must be in want of a wife.')


# using POS tagger (build in TextBlob) to tell us which word is which in the sentence:

pride.tags

# >> the result is a list of pairs of strings
# >> you can see the part-of-speech tags

[('It', 'PRP'),
 ('is', 'VBZ'),
 ('a', 'DT'),
 ('truth', 'NN'),
 ('universally', 'RB'),
 ('acknowledged', 'VBD'),
 ('that', 'IN'),
 ('a', 'DT'),
 ('single', 'JJ'),
 ('man', 'NN'),
 ('in', 'IN'),
 ('possession', 'NN'),
 ('of', 'IN'),
 ('a', 'DT'),
 ('good', 'JJ'),
 ('fortune', 'NN'),
 ('must', 'MD'),
 ('be', 'VB'),
 ('in', 'IN'),
 ('want', 'NN'),
 ('of', 'IN'),
 ('a', 'DT'),
 ('wife', 'NN')]


# counting the adjectives:

count = 0
for (word, tag) in pride.tags:
    if tag == 'JJ':
        count = count + 1


# see the result:

count

2


# counting the adjectives function:

def adjs(text):
    count = 0
    for (word, tag) in pride.tags:
        if tag == 'JJ':
            count = count + 1
    return (count)


adjs(pride)

2


adjs(gravity)

2


gravity

TextBlob("A screaming comes across the sky.")


hr = TextBlob('All human beings are born free and equal in dignity and rights. They are endowed with reason and conscience and should act towards one another in a spirit of brotherhood.')


# check out the different sentences in a text:

hr.sentences

# >> result - a list of sentence objects

[Sentence("All human beings are born free and equal in dignity and rights."),
 Sentence("They are endowed with reason and conscience and should act towards one another in a spirit of brotherhood.")]


pride.sentences

[Sentence("It is a truth universally acknowledged, that a single man in possession of a good fortune, must be in want of a wife.")]


gravity.sentences

[Sentence("A screaming comes across the sky.")]


#check out the length - how many sentences are there:

len(hr.sentences)

2

[15] Text III: Advanced Text Processing¶

[15.1] Intro¶

[15.2] Words and Sentences¶

[15.3] Adjective Counting with Part-of-Speech Tagging¶

[15.4] Sentence Counting with a Tokenizer¶