My own Python #CotswoldWildLifeSafari

#AnalysingError: Resource universal_tagset not found.

Why does that happen?

I have following code in my Notebook/python file.

However when I executed this code on a brand new VM with just baseline Anaconda I got following errors:

---------------------------------------------------------------------------
LookupError Traceback (most recent call last)
<ipython-input-2-03af1289e1af> in <module>
1 # reading the Treebank tagged sentences
----> 2 nltk_data = list(nltk.corpus.treebank.tagged_sents(tagset='universal'))

~\anaconda3\lib\site-packages\nltk\corpus\reader\util.py in __len__(self)
387 if len(self._offsets) <= len(self._pieces):
388 # Iterate to the end of the corpus.
--> 389 for tok in self.iterate_from(self._offsets[-1]):
390 pass
391

~\anaconda3\lib\site-packages\nltk\corpus\reader\util.py in iterate_from(self, start_tok)
410
411 # Get everything we can from this piece.
--> 412 for tok in piece.iterate_from(max(0, start_tok - offset)):
413 yield tok
414

~\anaconda3\lib\site-packages\nltk\corpus\reader\util.py in iterate_from(self, start_tok)
294 self._current_toknum = toknum
295 self._current_blocknum = block_index
--> 296 tokens = self.read_block(self._stream)
297 assert isinstance(tokens, (tuple, list, AbstractLazySequence)), (
298 "block reader %s() should return list or tuple."

~\anaconda3\lib\site-packages\nltk\corpus\reader\api.py in reader(stream)
419 def tagged_sents(self, fileids=None, tagset=None):
420 def reader(stream):
--> 421 return self._read_tagged_sent_block(stream, tagset)
422
423 return concat(

~\anaconda3\lib\site-packages\nltk\corpus\reader\api.py in _read_tagged_sent_block(self, stream, tagset)
470 def _read_tagged_sent_block(self, stream, tagset=None):
471 return list(
--> 472 filter(None, [self._tag(t, tagset) for t in self._read_block(stream)])
473 )
474

~\anaconda3\lib\site-packages\nltk\corpus\reader\api.py in <listcomp>(.0)
470 def _read_tagged_sent_block(self, stream, tagset=None):
471 return list(
--> 472 filter(None, [self._tag(t, tagset) for t in self._read_block(stream)])
473 )
474

~\anaconda3\lib\site-packages\nltk\corpus\reader\bracket_parse.py in _tag(self, t, tagset)
117 tagged_sent = [(w, p) for (p, w) in TAGWORD.findall(self._normalize(t))]
118 if tagset and tagset != self._tagset:
--> 119 tagged_sent = [
120 (w, map_tag(self._tagset, tagset, p)) for (w, p) in tagged_sent
121 ]

~\anaconda3\lib\site-packages\nltk\corpus\reader\bracket_parse.py in <listcomp>(.0)
118 if tagset and tagset != self._tagset:
119 tagged_sent = [
--> 120 (w, map_tag(self._tagset, tagset, p)) for (w, p) in tagged_sent
121 ]
122 return tagged_sent

~\anaconda3\lib\site-packages\nltk\tag\mapping.py in map_tag(source, target, source_tag)
134 source = "en-brown"
135
--> 136 return tagset_mapping(source, target)[source_tag]

~\anaconda3\lib\site-packages\nltk\tag\mapping.py in tagset_mapping(source, target)
89 if source not in _MAPPINGS or target not in _MAPPINGS[source]:
90 if target == "universal":
---> 91 _load_universal_map(source)
92 # Added the new Russian National Corpus mappings because the
93 # Russian model for nltk.pos_tag() uses it.

~\anaconda3\lib\site-packages\nltk\tag\mapping.py in _load_universal_map(fileid)
57
58 def _load_universal_map(fileid):
---> 59 contents = load(join(_UNIVERSAL_DATA, fileid + ".map"), format="text")
60
61 # When mapping to the Universal Tagset,

~\anaconda3\lib\site-packages\nltk\data.py in load(resource_url, format, cache, verbose, logic_parser, fstruct_reader, encoding)
750
751 # Load the resource.
--> 752 opened_resource = _open(resource_url)
753
754 if format == "raw":

~\anaconda3\lib\site-packages\nltk\data.py in _open(resource_url)
875
876 if protocol is None or protocol.lower() == "nltk":
--> 877 return find(path_, path + [""]).open()
878 elif protocol.lower() == "file":
879 # urllib might not use mode='rb', so handle this one ourselves:

~\anaconda3\lib\site-packages\nltk\data.py in find(resource_name, paths)
583 sep = "*" * 70
584 resource_not_found = "\n%s\n%s\n%s\n" % (sep, msg, sep)
--> 585 raise LookupError(resource_not_found)
586
587

LookupError:
**********************************************************************
Resource universal_tagset not found.
Please use the NLTK Downloader to obtain the resource:

Reason for the error

This happens when nltk is downloaded for the first time it does not download entire package due to the size.

Resolution

Their are couple of ways to fix this issue.

Resolution 1: Just download what you need

With the error you will also notice that python provides a resolution as well which is to execute following lines of code:

>>> nltk.download('universal_tagset')

you will notice that python will download the universal tagset and will report back to CLI with following lines:

[nltk_data] Downloading package universal_tagset to
[nltk_data] C:\Users\Development\AppData\Roaming\nltk_data...
[nltk_data] Unzipping taggers\universal_tagset.zip.

Resolution 2: Download everything within nltk

Or you can use following line:

nltk.download('all')"

I used this link on stacktrace to understand this issue.

Cloud Professional, Azure Devops Certified, work for a Tech Major, never dull, sharpening my skills and sharing my learnings in the simplest form.