# Importaciones necesarias
import nltk
from nltk.tag.stanford import StanfordNERTagger

# Inicializamos el etiquetador:
# Requerimos especificar las direcciones 
d1='C:/Users/usuario/Downloads/stanford-ner-2020-11-17/stanford-ner.jar'
d2='C:/Users/usuario/Downloads/stanford-ner-2020-11-17/classifiers/english.all.3class.distsim.crf.ser.gz'
PATH_TO_JAR = d1
PATH_TO_MODEL = d2
tagger = StanfordNERTagger(model_filename=PATH_TO_MODEL,
                           path_to_jar=PATH_TO_JAR, 
                           encoding='utf-8')

sentence = """First up in London will be Riccardo Tisci, onetime Givenchy darling, 
favorite of Kardashian-Jenners everywhere, who returns to the catwalk with men’s 
and women’s wear after a year and a half away, this time to reimagine Burberry 
after the departure of Christopher Bailey."""      

# Tokenizamos el texto
words = nltk.word_tokenize(sentence) 

# Luego, colocamos las palabras tokenizadas dentro del etiquetador
tagged = tagger.tag(words)
tagged

[('First', 'O'),
 ('up', 'O'),
 ('in', 'O'),
 ('London', 'LOCATION'),
 ('will', 'O'),
 ('be', 'O'),
 ('Riccardo', 'PERSON'),
 ('Tisci', 'PERSON'),
 (',', 'O'),
 ('onetime', 'O'),
 ('Givenchy', 'ORGANIZATION'),
 ('darling', 'O'),
 (',', 'O'),
 ('favorite', 'O'),
 ('of', 'O'),
 ('Kardashian-Jenners', 'O'),
 ('everywhere', 'O'),
 (',', 'O'),
 ('who', 'O'),
 ('returns', 'O'),
 ('to', 'O'),
 ('the', 'O'),
 ('catwalk', 'O'),
 ('with', 'O'),
 ('men', 'O'),
 ('’', 'O'),
 ('s', 'O'),
 ('and', 'O'),
 ('women', 'O'),
 ('’', 'O'),
 ('s', 'O'),
 ('wear', 'O'),
 ('after', 'O'),
 ('a', 'O'),
 ('year', 'O'),
 ('and', 'O'),
 ('a', 'O'),
 ('half', 'O'),
 ('away', 'O'),
 (',', 'O'),
 ('this', 'O'),
 ('time', 'O'),
 ('to', 'O'),
 ('reimagine', 'O'),
 ('Burberry', 'O'),
 ('after', 'O'),
 ('the', 'O'),
 ('departure', 'O'),
 ('of', 'O'),
 ('Christopher', 'PERSON'),
 ('Bailey', 'PERSON'),
 ('.', 'O')]


# Importacion necesaria
import wikipedia

# Lenguaje es espaniol
wikipedia.set_lang('en')

# Busqueda de un articulo de 10 frases
result = wikipedia.summary('Albert Einstein', sentences=10)

# Tokenizamos el texto
words = nltk.word_tokenize(result) 

# Luego, colocamos las palabras tokenizadas dentro del etiquetador
tagged = tagger.tag(words)


# Veamos lo que se ha logrado etiquetar
for i in range(len(tagged)):
    if tagged[i][1] != 'O':
        print(tagged[i])

('Albert', 'PERSON')
('Einstein', 'PERSON')
('Einstein', 'PERSON')
('Einstein', 'PERSON')
('Einstein', 'PERSON')
('Einstein', 'PERSON')


# Inicializamos el etiquetador:
# Requerimos especificar las direcciones 
d1='C:/Users/usuario/Downloads/stanford-ner-2020-11-17/stanford-ner.jar'
# Cambiamos de direccion para cambiar de modelo
d2='C:/Users/usuario/Downloads/stanford-ner-2020-11-17/classifiers/english.conll.4class.distsim.crf.ser.gz'
PATH_TO_JAR = d1
PATH_TO_MODEL = d2
tagger = StanfordNERTagger(model_filename=PATH_TO_MODEL,
                           path_to_jar=PATH_TO_JAR, 
                           encoding='utf-8')
# Busqueda de un articulo de 10 frases
result = wikipedia.summary('Albert Einstein', sentences=10)
# Tokenizamos el texto
words = nltk.word_tokenize(result) 
# Luego, colocamos las palabras tokenizadas dentro del etiquetador
tagged = tagger.tag(words)   
# Veamos lo que se ha logrado etiquetar
for i in range(len(tagged)):
    if tagged[i][1] != 'O':
        print(tagged[i])

('Albert', 'PERSON')
('Einstein', 'PERSON')
('German', 'MISC')
('German-born', 'MISC')
('Einstein', 'LOCATION')
('Nobel', 'MISC')
('Prize', 'MISC')
('Einstein', 'LOCATION')
('Einstein', 'PERSON')
('Brownian', 'MISC')
('Einstein', 'LOCATION')


import nltk
# Descarga necesaria
nltk.download('averaged_perceptron_tagger')

# Texto
sentence = """First up in London will be Riccardo Tisci, onetime Givenchy darling, 
favorite of Kardashian-Jenners everywhere, who returns to the catwalk with men’s 
and women’s wear after a year and a half away, this time to reimagine Burberry 
after the departure of Christopher Bailey."""  

# Tokenizamos el texto
words = nltk.word_tokenize(sentence) 

# Etiquetador
tagged = nltk.pos_tag(words)
tagged

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\usuario\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!

[('First', 'NNP'),
 ('up', 'RP'),
 ('in', 'IN'),
 ('London', 'NNP'),
 ('will', 'MD'),
 ('be', 'VB'),
 ('Riccardo', 'NNP'),
 ('Tisci', 'NNP'),
 (',', ','),
 ('onetime', 'RB'),
 ('Givenchy', 'NNP'),
 ('darling', 'NN'),
 (',', ','),
 ('favorite', 'NN'),
 ('of', 'IN'),
 ('Kardashian-Jenners', 'NNP'),
 ('everywhere', 'RB'),
 (',', ','),
 ('who', 'WP'),
 ('returns', 'VBZ'),
 ('to', 'TO'),
 ('the', 'DT'),
 ('catwalk', 'NN'),
 ('with', 'IN'),
 ('men', 'NNS'),
 ('’', 'VBP'),
 ('s', 'NN'),
 ('and', 'CC'),
 ('women', 'NNS'),
 ('’', 'VBP'),
 ('s', 'JJ'),
 ('wear', 'NN'),
 ('after', 'IN'),
 ('a', 'DT'),
 ('year', 'NN'),
 ('and', 'CC'),
 ('a', 'DT'),
 ('half', 'NN'),
 ('away', 'RB'),
 (',', ','),
 ('this', 'DT'),
 ('time', 'NN'),
 ('to', 'TO'),
 ('reimagine', 'VB'),
 ('Burberry', 'NNP'),
 ('after', 'IN'),
 ('the', 'DT'),
 ('departure', 'NN'),
 ('of', 'IN'),
 ('Christopher', 'NNP'),
 ('Bailey', 'NNP'),
 ('.', '.')]


nltk.download('maxent_ne_chunker')
nltk.download('words')

print(nltk.ne_chunk(tagged))

(S
  First/NNP
  up/RP
  in/IN
  (GPE London/NNP)
  will/MD
  be/VB
  (PERSON Riccardo/NNP Tisci/NNP)
  ,/,
  onetime/RB
  (GPE Givenchy/NNP)
  darling/NN
  ,/,
  favorite/NN
  of/IN
  Kardashian-Jenners/NNP
  everywhere/RB
  ,/,
  who/WP
  returns/VBZ
  to/TO
  the/DT
  catwalk/NN
  with/IN
  men/NNS
  ’/VBP
  s/NN
  and/CC
  women/NNS
  ’/VBP
  s/JJ
  wear/NN
  after/IN
  a/DT
  year/NN
  and/CC
  a/DT
  half/NN
  away/RB
  ,/,
  this/DT
  time/NN
  to/TO
  reimagine/VB
  (PERSON Burberry/NNP)
  after/IN
  the/DT
  departure/NN
  of/IN
  (PERSON Christopher/NNP Bailey/NNP)
  ./.)

[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     C:\Users\usuario\AppData\Roaming\nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to
[nltk_data]     C:\Users\usuario\AppData\Roaming\nltk_data...
[nltk_data]   Package words is already up-to-date!

Procesamiento de lenguaje natural¶

Reconocimiento de Entidades Nombradas¶

NER standford ¶

NER NLTK ¶