JL ivqxdZ ddlZddlZddlZddlZddlZddlZddlZddlZddl Z ddl Z ddl Z ddl m Z mZddlmZmZddlmZddlmZddlZGddZGd d ZGd d ZGd deZGddeZGddeZGddeZGddeZGddeZGddeZ GddeZ!GddeZ"Gdd eZ#Gd!d"eZ$Gd#d$eZ%Gd%d&eZ&Gd'd(Z'Gd)d*Z(Gd+d,Z)d-Z*d.Z+d/Z,d0Z-d\d1Z.d\d2Z/d3Z0d]d4Z1d5Z2d6Z3d7Z4d8Z5e'Z6e6jnZ7d9Z8d:Z9d;Z:e;de>jd>d?d@dAdBCe>jdDdEdFdGdHdIJe>jdKdLdMdGdHdNJe>jdOdPdQdGdHdRJe>jdSdTdUejjdVdWXe>j\ZCZDe'eCjYZFeDrZeDD]TZGeFjoeGeCjeCjeCjeCjZZLeLdHk(sGeCjsTyyeFjoeCjeCjeCjeCj[yy)^aH The NLTK corpus and module downloader. This module defines several interfaces which can be used to download corpora, models, and other data packages that can be used with NLTK. Downloading Packages ==================== If called with no arguments, ``download()`` will display an interactive interface which can be used to download and install new packages. If Tkinter is available, then a graphical interface will be shown, otherwise a simple text interface will be provided. Individual packages can be downloaded by calling the ``download()`` function with a single argument, giving the package identifier for the package that should be downloaded: >>> download('treebank') # doctest: +SKIP [nltk_data] Downloading package 'treebank'... [nltk_data] Unzipping corpora/treebank.zip. NLTK also provides a number of "package collections", consisting of a group of related packages. To download all packages in a colleciton, simply call ``download()`` with the collection's identifier: >>> download('all-corpora') # doctest: +SKIP [nltk_data] Downloading package 'abc'... [nltk_data] Unzipping corpora/abc.zip. [nltk_data] Downloading package 'alpino'... [nltk_data] Unzipping corpora/alpino.zip. ... [nltk_data] Downloading package 'words'... [nltk_data] Unzipping corpora/words.zip. Download Directory ================== By default, packages are installed in either a system-wide directory (if Python has sufficient access to write to it); or in the current user's home directory. However, the ``download_dir`` argument may be used to specify a different installation target, if desired. See ``Downloader.default_download_dir()`` for more a detailed description of how the default download directory is chosen. NLTK Download Server ==================== Before downloading any packages, the corpus and module downloader contacts the NLTK download server, to retrieve an index file describing the available packages. By default, this index file is loaded from ``https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/index.xml``. If necessary, it is possible to create a new ``Downloader`` object, specifying a different URL for the package index file. Usage:: python nltk/downloader.py [-d DATADIR] [-q] [-f] [-k] PACKAGE_IDS or:: python -m nltk.downloader [-d DATADIR] [-q] [-f] [-k] PACKAGE_IDS N)md5sha256) HTTPErrorURLError)urlopen) ElementTreecJeZdZdZ ddZedZdZdZy)Packagea4 A directory entry for a downloadable package. These entries are extracted from the XML index file that is downloaded by ``Downloader``. Each package consists of a single file; but if that file is a zip file, then it can be automatically decompressed when the package is installed. Nc ||_ |xs||_ ||_ ||_ t ||_ t ||_ ||_ ||_ | |_ | |_ | |_ | |_ tjj|j!ddd}tjj#|||z|_ t't | |_ |j*j-|y)N/)idnamesubdirurlintsize unzipped_sizechecksum svn_revision copyrightcontactlicenseauthorospathsplitextsplitjoinfilenameboolunzip__dict__update)selfrrrrrrrrrrrrr#kwexts U/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/nltk/downloader.py__init__zPackage.__init__s"3JB -  4EI : / ! 4(<"0  , 3 %ggsyy~b1215 VR#X6  3#e*%   R ct|trtj|}|jD]'}t|j||j|<)t di|jS)N) isinstancestrrparseattribr )xmlkeys r)fromxmlzPackage.fromxml s[ c3 ##C(C:: 3C!#**S/2CJJsO 3$$$r+c4|j|jkSNrr&others r)__lt__zPackage.__lt__ww!!r+c d|jzS)Nz r7r&s r)__repr__zPackage.__repr__s''r+) NNNNNUnknownr@r@r@T __name__ __module__ __qualname____doc__r* staticmethodr4r:r>r-r+r)r r sO C!J%%"(r+r c4eZdZdZddZedZdZdZy) Collectionz A directory entry for a collection of downloadable packages. These entries are extracted from the XML index file that is downloaded by ``Downloader``. Nc ||_ |xs||_ ||_ d|_ |jj |yr6)rrchildrenpackagesr$r%)r&rrJrr's r)r*zCollection.__init__sH6JB 0   ,  3 R r+cJt|trtj|}|jD]'}t|j||j|<)|j dDcgc]}|j d}}tdd|i|jScc}w)NitemrefrJr-)r.r/rr0r1findallgetrH)r2r3childrJs r)r4zCollection.fromxml1s c3 ##C(C:: 3C!#**S/2CJJsO 325++f2EFEIIe$FF:8:szz::Gs/B c4|j|jkSr6r7r8s r)r:zCollection.__lt__:r;r+c d|jzS)Nzr7r=s r)r>zCollection.__repr__=s 477**r+r6rAr-r+r)rHrHs* !$;;"+r+rHceZdZdZy)DownloaderMessagezSA status message object, used by ``incr_download`` to communicate its progress.N)rBrCrDrEr-r+r)rUrUFs!r+rUceZdZdZdZy)StartCollectionMessagez!r+rceZdZdZdZy)SelectDownloadDirMessagez:Indicates what download directory the data server is usingc||_yr6) download_dirr&rs r)r*z!SelectDownloadDirMessage.__init__s (r+Nr]r-r+r)rrs D)r+rcDeZdZdZdZ dZ dZ dZ dZ dZ d(d Z d)d Z d Z dZ dZdZdZd*dZdZdZdZddd d dd d ej,fdZd+dZd+dZd+dZd+dZdZd,dZd+dZdZd Z d!Z!d"Z"d#Z#e$e"e#Z%d$Z&d%Z'd&Z(e$e'e(Z)d'Z*y)- Downloaderzy A class used to access the NLTK data server, which can be used to download corpora and other data packages. izChttps://raw.githubusercontent.com/nltk/nltk_data/gh-pages/index.xml installedz not installedz out of datepartialNc|xs |j|_ i|_ i|_ ||_ d|_ d|_ i|_ d|_ |j|j|_yyr6) DEFAULT_URL_url _collections _packages _download_dir_index_index_timestamp _status_cache_errorsdefault_download_dir)r&server_index_urlrs r)r*zDownloader.__init__s$8(8(8 7E?)I @ $ L    R    %!%!:!:!FrK collectionsz%s:rr3T*-P ---------------------------K subsequent_indentz [{}] {} {}. zHit Enter to continue: )xqz([*] marks installed packagesz+; [-] marks out-of-date or corrupt packagesz+; [P] marks partially installed collections)L)rwidth)rprintlenrappend capitalizesortedgetattrr/status INSTALLEDSTALEPARTIAL NOT_INSTALLEDtextwrapfillrrformatljustrinputlower)r&r show_packagesshow_collectionsheader more_promptskip_installedlinesstaler categoriescategoryinforprefixr user_inputmsgs r)listzDownloader.listsY  --L 5 D E  #c$))n,- . /$)); < #c$))n,- . QJE    j )    m ," H %(--// 0 QJE6wtX68cB T<8T^^+TZZ' ET\\)"GNNCJJLL#&&     }} 4TWW5rX#n++FDGGMM"c4JDQRTZZ-..52:!&'@!AJ!'')Z7E/ 0 G7 8.  @ @C  @ @C hmmC#IBGHr+cV|j|jjSr6) _update_indexrvaluesr=s r)rKzDownloader.packages9s! ~~$$&&r+c|j|jjDcgc]\}}|jdk(s|c}}Scc}}wNcorporarritemsrr&rpkgs r)rzDownloader.corpora=> %)^^%9%9%;W SszzY?VWWW AAc|j|jjDcgc]\}}|jdk7s|c}}Scc}}wrrrs r)modelszDownloader.modelsArrcV|j|jjSr6)rrrr=s r)rzDownloader.collectionsEs#   ''))r+cHt|tr|j|S|Sr6)r.r/r)r& info_or_ids r) _info_or_idzDownloader._info_or_idMs! j# &99Z( ( r+c# K||j}t|t|ttfr|j |||Ed{y |j |}t|tr@t||j|j||Ed{t|y|j|||Ed{y7#ttf$r}tdd|d|Yd}~yd}~wwxYw7`79w)NzError loading : )rrr.rtuple_download_listrOSError ValueErrorrrHrW incr_downloadrJr__download_package)r&rrforceres r)rzDownloader.incr_download\s  --L*<8 8 j4- 0**:|UK K K  ##J/D dJ '(. .))$--uM M M)$/ /--dL%H H H% L $ t~j\A3%GH H   N IsZAD C DC"=DD (DD DC=C83D8C==DDcNt|tryt|jS)Nr)r.r rrK)r&rMs r) _num_packageszDownloader._num_packagesxs dG $t}}% %r+c#Ktt|D]} j||||<t fd|D}d}t|D]\}}t|trd|z } nt|j|z } j|||D]4} t| trt|| j| zz1| 6|d| zz }y#ttf$r}t |||Yd}~yd}~wwxYww)Nc3@K|]}j|ywr6)r).0rMr&s r) z,Downloader._download_list..sF4--d3Fsrg?d)rangerrrrrsum enumerater.r rKrrr) r&rrrir num_packagesrrMdeltars ` r)rzDownloader._download_list~ss5z" A ++E!H5a FFF  ' $GAt$(l*DMM*\9))$ eD c?3)(S\\E5I*IJJI   e #H $Z( "58Q// s.D CB,D D .D?D D  D c #Kt|td|j||}|s7||jk(r(t |tdt |y|j j|jdtjj||j}tjj|r1||jk(r t|tj |tj"|dtj"tjj||j$dt'|td t)|j*}t-|d5}t/d|j0dz }t3j4D]Q} |j7d} |j9| | sn+| d zdk(s2tt;d dd | |z zzSddd|j=tC|td |jjEdrtjj||j$} |jFsGtjjtjj| |jr8tI|tK|| dD] } || _&| tO|t |y#1swYxYw#t>$r5} tA|d |jd |j*d| Yd} ~ yd} ~ wwxYww)NrrT)exist_okwbr@PrzError downloading z from <>: .zipF)verbose)(rbrrrr{rjrpoprrrr r!existsrr~removemakedirsrrnrropenmaxr itertoolscountreadwritemincloserrrqendswithr#ru _unzip_iterrerx)r&rrrrfilepathinfileoutfile num_blocksblocksrzipdirrs r)rzDownloader._download_packages!$''a  T<04>>1!$' '!#& &&t, ,  tww-77<< dmm< 77>>( ##"4(( IIh  L40 BGGLLt{{;dK#4((a   TXX&Fh% V DII$;< &__.VE I.AMM!$qyA~-c"a" @R:S6S.TUU V V LLN$D))b!! == ! !& )WW\\, BggtxxQRS    sPE6M:9!L9AL,:"L,L94C8M:,L61L99 M7+M2-M:2M77M: [nltk_data] c  tjt| ||||_|j yd fd } |j |||D]} t | tr| | j|rt| j|ryd|_ |sW dtj} | dvr-|j| jj|||||sy| dvry|rt | t r-| d| j"jzd z  t | t$r[ dd |jr!| d | j"jz+| d | j"jzLt | t&r%| d | jjd|dt | t(r"| d| jjzdt | t*r"| d| jj,zdt | t.s| j0}y)N)fileTcTtj||z|zdzy)N )initial_indentr)rr)rprefix2rprint_tos r)showz!Downloader.download..shows/MM'-'7*07*:W*Dr+Fz(Error installing package. Retry? [n/y/e])yY)rEDownloading collection %rz | z$Downloaded collection %r with errorszDone downloading collection %szDownloading package z to z...z!Package %s is already up-to-date! z Unzipping %s.r?) functoolsrrr_interactive_downloadrr.rrrrrstripdownloadrerrWrZr_rbr{rur!rr) r&rrquietrr halt_on_errorraise_on_errorprint_error_tor%rchoicer$s ` @r)r0zDownloader.downloads$$U@  '%1"  & & ( ))*lEJ< 8c<0%%(55$$#'DL  !KL!&!Z/#'== #  , % % & - .$(-#z1#(!#'=>83>>;L;LLM') (#C)@A (!'<< F"%.."3"3!4 !!ACNNDUDU!UV$C)<="{{~~|=$C9@3;;>>QSWX$C):;_s{{/C/CCTJ$C)AB'*'7'7 y< 8zr+c@|j|||jk(Sr6)rrr&rrs r)is_stalezDownloader.is_stale9s{{:|4 BBr+c@|j|||jk(Sr6)rrr7s r) is_installedzDownloader.is_installed<s{{:|4FFr+cv||jjy|jj|dyr6)rclearrr&rs r)clear_status_cachezDownloader.clear_status_cache?s/ :    $ $ &    " "2t ,r+c| |j}|j|}t|tr|jDcgc]}|j |j }}|j|vr |jS|j|vr |jS|j|vr|j|vr |jS|j|vr |jS|jStjj||j}||jk7r|j||S|j |j vr)|j|||j |j <|j |j Scc}w)z Return a constant describing the status of the given package or collection. Status can be one of ``INSTALLED``, ``NOT_INSTALLED``, ``STALE``, or ``PARTIAL``. )rrr.rHrKrrrrrrrrr r! _pkg_statusr)r&rrrr pkg_statusrs r)rzDownloader.statusEsQ  --L + dJ '9=G#$++cff-GJGzzZ'zz!+||#:-$2D2D 2R||###z1)))~~%ww||L$--@Ht111''h7777$"4"44262B2B42RD&&tww/))$''22)Hs"F ctjj|s |jS tj|}|j t|jk7r |jSt||jk7r |jS|jdr|dd}tjj|s |jStjj|s |jStdtj |D}||j"k7r |jS|jS#t $r|jcYSwxYw)Nrr*c3K|]L\}}}|D]A}tjtjj||jCNywr6)rstatrr st_size)rd_filesfs r)rz)Downloader._pkg_status..sQ Aq%  Q*+33 3 sAA)rrr rrDrrErrr md5_hexdigestrrrisdirrwalkr)r&rrfilestatunzipdirrs r)r@zDownloader._pkg_statusgs"ww~~h'%% % &wwx(H   s499~ -::   "dmm 3::    V $}H77>>(+~~%77==*zz! #%778#4 M  2 22zz!~~7 &%% % &sE E$#E$c|j|jD]5}|j||jk(s"|j |||7y)zA Re-download any packages whose status is STALE. )r1rN)r>rKrrr0)r&r1rrs r)r%zDownloader.updatesL !==? ?C{{34::- cv > ?r+c|j1|/tj|jz |jkDsy|xs |j|_t j jtjt|jj|_tj|_|jjdDcgc]}tj|}}|Dcic]}|j|c}|_|jjdDcgc]}t"j|}}|Dcic]}|j|c}|_|j$j'D]}t)|j*D]\}}||j vr|j ||j*|<1||j$vr|j$||j*|<\t-dj/||j*|=|j$j'D]o}i}|g} | D]N} t1| t"r| j3| j*/t1| tr| || j<OP|j'|_q|j6j9ycc}wcc}wcc}wcc}w)zA helper function that ensures that self._index is up-to-date. If the index is older than self.INDEX_TIMEOUT, then download it again.Npackages/packagecollections/collectionz.removing collection member with no package: {})rtimer INDEX_TIMEOUTrnltk internalsElementWrapperrr0rgetrootrOr r4rrrHrrrrJrrr.extendrKrr<) r&rprKcrrZrchild_idqueuerQs r)rzDownloader._update_indexs KK yy{T222T5G5GG $499 nn33   gdii0 1 9 9 ;  !% 15 0C0CDV0WX1GOOA&XX+34a!$$'4,0;;+>+>?W+X &'J  q !  /::QTT1W:++224 /J()<)<= / 8t~~--1^^H-EJ''*!2!22-1->->x-HJ''*HOO$ #++A. / /++224 4JHLE eZ0LL0w/).HUXX&  #+//"3J  4   "MY4 ;s!KK<K$K)c:|j|jS)z Return the XML index describing the packages available from the data server. If necessary, this index will be downloaded from the data server. )rrr=s r)indexzDownloader.indexs {{r+c|j||jvr|j|S||jvr|j|Std|z)zKReturn the ``Package`` or ``Collection`` record for the given item.Package %r not found in index)rrrrr=s r)rzDownloader.infos[   >>"% % "" "$$R( (82=>>r+c|j|jjdD]}|jd|k(s|cS|jjdD]}|jd|k(s|cSt d|z)z-Return the XML info record for the given itemrQrrRra)rrrOrPr)r&rrerZs r)xmlinfozDownloader.xmlinfos {{**+=> G{{4 B& ++--.FG "J~~d#r)!! "82=>>r+c|jS)z)The URL for the data server's index file.)rr=s r)_get_urlzDownloader._get_urls yyr+cZ|j} |j|y#||_xYw)z Set a new URL for the data server. If we're unable to contact the given url, then the original url is kept. N)rr)r&r original_urls r)_set_urlzDownloader._set_urls1 yy     s # $DI s *cdtjvrytjjD]E}tjj |s#tj j|sC|cStjdk(r&dtjvrtjd}n/tjjd}|dk(r tdtjj|dS)a Return the directory to which packages will be downloaded by default. This value can be overridden using the constructor, or on a case-by-case basis using the ``download_dir`` argument when calling ``download()``. On Windows, the default download directory is ``PYTHONHOME/lib/nltk``, where *PYTHONHOME* is the directory containing Python, e.g. ``C:\Python25``. On all other platforms, the default directory is the first of the following which exists or which can be created with write permission: ``/usr/share/nltk_data``, ``/usr/local/share/nltk_data``, ``/usr/lib/nltk_data``, ``/usr/local/lib/nltk_data``, ``~/nltk_data``. APPENGINE_RUNTIMENwin32APPDATAz~/z+Could not find a default download directory nltk_data) renvironrUdatarr rV is_writablesysplatform expanduserrr )r&nltkdirhomedirs r)rzDownloader.default_download_dir s" "** , yy~~ Gww~~g&4>>+E+Eg+N  <<7 "yBJJ'>jj+Ggg((.G$ !NOOww||G[11r+c|jS)a The default directory to which packages will be downloaded. This defaults to the value returned by ``default_download_dir()``. To override this default on a case-by-case basis, use the ``download_dir`` argument when calling ``download()``. )rr=s r)_get_download_dirzDownloader._get_download_dir1s!!!r+cF||_|jjyr6)rrr<rs r)_set_download_dirzDownloader._set_download_dir:s)   "r+ctjjddjdk(rt |j y ddl} t|jy#t$rt |j YywxYw#|j$rt |j YywxYw)N'NLTK_DOWNLOADER_FORCE_INTERACTIVE_SHELLfalsetruer) rrnrPrDownloaderShellruntkinter ImportError DownloaderGUImainloopTclError)r&rs r)r.z Downloader._interactive_downloadEs JJNNDg N T T V  D ! % % '     ( $  ( ( *   D ! % % '    ( D ! % % ' (s$ A,B,"BB(B?>B?)NN)NTTTFF)NFr6)Fr)+rBrCrDrErTrrrrrr*rrKrrrrrrrrrqstderrr0r8r:r>rr@r%rr_rrcrerhpropertyrrrwryrr.r-r+r)rrs7MXK!I$#M E"GE=N:Ix'XX*I8& $2@)Hzz^@CG- 3D"H?<#|? ?  8X &C%2N"# -/@AL (r+rc<eZdZdZdZdZdZdZdZdZ dZ y ) r~c||_yr6)_ds)r& dataservers r)r*zDownloaderShell.__init__^s r+ctddtd|Dz t|dz zdz}td|j|ztdy)NzK---------------------------------------------------------------------------Dc32K|]}t|ywr6)r)ros r)rz;DownloaderShell._simple_interactive_menu..cs01A0srrr!)rrrr )r&optionsspcs r)_simple_interactive_menuz(DownloaderShell._simple_interactive_menuasN hC0000c'lQ6FG#M fsxx(() hr+c td |jddddddtd j}|s t=|j j d }|j d d} |d k(r=t|j j|j jd dnl|dk(r|jnV|dk(r|jn@|dvry|dk(r|j|n$|dk(r|jntd|zt+#t$r}td|zYd}~(d}~wt$r"}td|jzYd}~Qd}~wwxYw)NNLTK DownloaderTz d) Downloadzl) Listz u) Updatez c) Configzh) Helpzq) Quitz Downloader> rrlF)rrhr[)rrrFuzCommand %r unrecognizedzError reading from server: %szError connecting to server: %s)rrrr/rrrrr_simple_interactive_help_simple_interactive_config_simple_interactive_download_simple_interactive_updaterrreason)r&rcommandargsrs r)rzDownloaderShell.rungsi    ) )  ~.446J &&(..03G##%ab)D Cc>GHHMM$(("7"7SWMX^113^335 *^55d;^3353j@A GK> ;59:: C6ABB Cs+>A2D81;D88 FE F E==FcR|r&|D] } |jj|d"y t t dt d}|j dk(r4|jj|jjdddh|j d vry|r4|jD] } |jj|d"y#ttf$r}t |Yd}~d}~wwxYw#ttf$r}t |Yd}~kd}~wwxYw) Nr!rTz*Download which package (l=list; x=cancel)? Identifier> rF)rrrrrr?) rr0rrrrrrrr)r&rargrrrs r)rz,DownloaderShell._simple_interactive_downloads#  HH%%c&%9  BC"#34 ##%,HHMM--$$('+ " %%'>9(..0%% HH--b-@% ) ,!HH*!(4%!!HH%s/C:DC?* C::C?D& D!!D&c ( g}dx}}tt|jdtD]\}|jj ||jj k(s6|j |j|jf^t|rtd|D]N\}}tjd|zddd d}td j|jd d |Pttd }|jdk(r)|D]#\}} |jj!|d%y|jdvr ytdyk#t"t$f$r} t| Yd} ~ gd} ~ wwxYw)NFrKrz/Will update following packages (o=ok; x=cancel)rrrrrz [ ] {} {}rrrrr!rrzNothing to update.)rrrr/rrrrrrrrrrrrr0rr) r&stale_packagesrrrpidpnamerrrs r)rz*DownloaderShell._simple_interactive_updatesN# #EGCH @88??4(DHHNN:"))477DII*>? @ GGH"0JJC#== E*B(cD-..syyS/A4HI J "#34 ##%,&4% U% HH--c&-A% %%'>9*+;,!(4%!!HH%s,E--F< F  Fcpttdtdtdtdy)Nz Commands:zH d) Download a package or collection u) Update out of date packagesz1 l) List packages & collections h) Helpz1 c) View & Modify Configuration q) Quit)rr=s r)rz(DownloaderShell._simple_interactive_helps.  k V  AB ABr+cttdtd|jjztdt|jj ztdt|jj zttdtd|jj zy)Nz Data Server:z - URL: <%s>z$ - %d Package Collections Availablez$ - %d Individual Packages AvailablezLocal Machine:z - Data directory: %s)rrrrrrKrr=s r) _show_configzDownloaderShell._show_configs  n o ,- 4s488;O;O;Q7RRS 4s488;L;L;N7OOP   &)>)>>?r+c|j t|jddddtdj j }|dk(r|jn|dk(ritdj }|d vr td nt jj|r||j_ nltd |zn]|d k(rRtd j }|d vr td n/|jdsd|z} ||j_ n|dk(ry(#t$r}td|d|Yd}~!d}~wwxYw)Nzs) Show Configzu) Set Server URLzd) Set Data Dirz m) Main MenuzConfig> rrFz New Directory> )r?rrXQz Cancelled!z)Directory %r not found! Create it first.rz New URL> )zhttp://https://rzError reading d<Z?d=Z@Gd>d?eAjZCd@ZDdAZEy)DrzU Graphical interface for downloading packages from the NLTK data server. ) r? IdentifierNameSizeStatus Unzipped Size CopyrightContactLicenseAuthorSubdirChecksumrr)r?rrrrr- )r?rrrrr)r?rrrr)z#000z#ccc)z#afaz#080)z#ffaz#880)z#faaz#800)#fffz#888)rz#45c)#aaaz#67a)z#f00rzhelvetica -16 boldcddl}ddlm}||_||_t j |_g|_g|_ d|_ i|_ g|_ d|_ |jd|jx}|_|j#d|j%d|j'|j(d|j+d |j,|j+d |j,d|_i|_|j3|j5 |j7|j?|jA|jBjEd|jBj+d |jFy#t8$r}|d |Yd}~yd}~wt:$r}|d |j<Yd}~d}~wwxYw)Nr showerrorFzNLTK Downloader Started!z+50+50rr) backgroundz z Error reading from serverError connecting to serverz )$rtkinter.messageboxrr _use_threads threadingLock_download_lock_download_msg_queue_download_abort_queue _downloading_afterid _log_messages _log_indent_logTktopgeometrytitle configure_BACKDROP_COLORbinddestroy _destroyed _column_vars _init_widgets _init_menu _fill_tablerrr _show_info_select_columns_tableselect_destroy)r&r use_threadsrrrrs r)r*zDownloaderGUI.__init__\s 0'(nn.#% %'"!   ,-!%dh X #$ !5!5a!8 9  -  -   >       1 dmm4 6 11 5 5 > 2AHH = = >s$,F G' F55 GGGc|jjdjtjd|j z|y)Nz{} {}{}z | )rrrrSctimerr&rs r)rzDownloaderGUI._logs9 !!   TZZ\543C3C+CS I r+c* ddl}ddlm}|jjdddd}|j ddd |j dd |jdd |j|d jdd|j|}|jdd d|j|}|jddd|j|}|jddd|j|d jdd|j|}|jddd|j|d jdd|jjddjd }|j dddjd<djd<gd_ i_ tjD]x\} } |j|| j} | j d| d zdzdz| jd j | j| j#<zj$D cgc]} j&j)| d  } } ||j$| dd!j*"_j,j/dj0d#tj$D]H\} } j2j)| j4}j,j/| |$Jj,j dd %j,j7j,j9d&j:j,jd'j<j,jd(j:j,jd)j>j,jd*j@j,jd+jB|jd d d,d-jDfd.d/jFfg}i_$t|D]\} \}} }|j|| 0jd| d1|jK|d2d3d4d45}||fjH|<|jd(jL|jd |ffd6 |jd | d7jjd jL|jO|d8j:d9_(jPj d:|jO|d;jRd9_*jTj d<:|j|d=jdjd >_+|jY|d?d!jZd d@d A_.j_j\j d<:jVj d:ycc} w)BNr)Tableraisedr)reliefborderpadxpadyrTboth)sidexpandrr)weight)height)columnrownews)rrstickyrr)rrrbottomr)siderrhighlightthickness) CollectionsCorporaModelsz All Packages)textfontleftr)rrz )column_weightsrlistbox_heightreprfunc) foregroundr)rrzzzzzz rz Server Index:rzDownload Directory:)rrcouriergroovez#007aff)rrdisabledforegroundr c&j|Sr6 _info_edit)rr3r&s r)z-DownloaderGUI._init_widgets..s8Lr+ewDownload)rrr)rRefreshrightr?)rr rsunken)rrrrr)0rnltk.draw.tablerFramerpackgrid_rowconfiguregrid_columnconfiguregridr _tab_names_tabsrLabel _TAB_FONTr _select_tabrCOLUMNSCOLUMN_WEIGHTSrP_table_reprfuncr columnconfig _MARK_COLOR COLUMN_WIDTHSDEFAULT_COLUMN_WIDTHfocusbind_to_listboxes _download _table_mark _prev_tab _next_tab _mark_allrhry_infoEntry _info_saveButton_download_button_refresh_refresh_button_progresslabelCanvas_PROGRESS_COLOR _progressbar_init_progressbar)r&rrf1tabframe tableframe buttonframe infoframe progressframertablabelrr rrr3callbackentrys` r)rzDownloaderGUI._init_widgetss )]]488HQQQ] O E$V4 Qq) !, b #((q(9==$ QAf 5]]2& qa7mmB' q8 b #((q(9MM"% aQv6 b #((q(9 HH111E1Ea1H&  s3)*%&O 0 ,FAsMM(4>>MJE JJF1q5A+);J < JJ|T%5%5 6&+DJJsyy{ #  ,LP<<X$--11&!<XX  LL) ))      t/?/?/B C"4<<0 5IAv&&**643L3LME KK $ $Qe $ 4 5 62  %%&94>>J D$4$45 T^^4 4>>2 DNN3 7 &&q&3OT]] 3 2D4J4J K  )24 5 %A%UH MM)%M 0 5 5QAc 5 RMM#,$ "E %h/DJJsO JJz4?? 3 JJ|3%L M JJaQtJ 4 5  lDOO4!( j$..!/!  """/&~~ ia .  !!w!/&mm ++A.++A. ,  $NN ++A. +    G,   f -WYs1#Zcddl}|jj}|j|d}|jddjd|j |jddfd |jd dfd  |j |jd d j |j |jddjd|jdd||j|d}jjddD]z}|jj}|jvsJ|j|<|jvr|jd|j|d|j ||jdd||j|d}jjddD]}|jd|z|ffd |j jjddD]}|jd|z|ffd |jdd||j|d}|jddj" |jddj$d|jdd|jj'd j$jj)|!y)"Nr)tearoffrReturn)rG underliner acceleratorzChange Server Indexc&jdS)Nrrr=sr)rz*DownloaderGUI._init_menu..sDOOE2r+)rGrMrzChange Download Directoryc&jdS)Nrrr=sr)rz*DownloaderGUI._init_menu.."sDOON;r+zShow LogrExitrzCtrl-xFile)rGrMmenur)rGrMvariablerViewz Sort by %sc<jj|dS)N ascendingrsort_byr[r&s r)rz*DownloaderGUI._init_menu..Bs$++*=*=a*Mr+)rGrzReverse sort by %sc<jj|dS)N descendingrYr[s r)rz*DownloaderGUI._init_menu..Is$++*=*=a*Nr+SortAbout InstructionsF1Helpz)rT)rMenur add_commandr/ add_separator _show_logr add_cascader column_namesIntVarrINITIAL_COLUMNSsetadd_checkbuttonrabouthelprconfig) r&rmenubarfilemenuviewmenurvarsortmenuhelpmenus ` r)rzDownloaderGUI._init_menus,,txx(<<<34>>x    '2  -;   :DNNS At||   &AHE <<<3kk..qr2 F..*C!2!22 22(+D  f %---  $ $CAUAU %    &AHE <<<3kk..qr2 F  "V+"(M !    kk..qr2 F  *V3"(N !   &AHE<<<37aL Atyyd   &AHE  fdii( W%r+c|jjD]L\}}|jr|jj |2|jj |Nyr6)rrrPr show_column hide_column)r&rrss r)rzDownloaderGUI._select_columnsWsQ,,224 0KFCwwy ''/ ''/  0r+c"ddlm}|jj |j |jjdy#t $r}|d|Yd}~3d}~wt $r}|d|jYd}~Wd}~wwxYwNrrrr) rrrr>rrrrrr)r&rrs r)r9zDownloaderGUI._refresh^su0 ##% >     1  6 11 5 5 > 2AHH = = >s#A B A%% B1B  Bc||j|j|\}}d|d<d|d<|jy)Nnormalstaterr)r6r4r-)r&info_keyrIrHs r)rzDownloaderGUI._info_editjs:  JJx0!g"h r+Nc|j}|jjD]P\}}|ddk(r| |j|ur|jdk7r|}1d|d<d|d<||j R|j y)Nr}disabledrLrr)rr4rwidgetkeysymrPr-)r&rr-rIrHs r)r6zDownloaderGUI._info_saveqs #zz002 &OE8W~+}U!2qxx87K!+g"*h% &  r+c|jj|jdr7t|trd|zS|dkrd|dz zS|dkrd|dz zSd |d z zS|d vr t |Sd|zS) Nrz %siz %.1f KBg@i@z %.1f MBg0Az %.1f GBgA)rr?)rrhrr.r/)r&rcolvals r)r(zDownloaderGUI._table_reprfunc~s ;; # #C ( 1 1& 9#s#|#w"cIo66w"cIo66"cIo66 '>s8OC< r+cddlm}||jjk(ry ||j_|j |jy#t $r}|dt |Yd}~1d}~wwxYw)NrrzError Setting Server Index)rrrrrrr/r)r&rrrs r)rhzDownloaderGUI._set_urlsd0 $((,,   <DHHL      < 2CF ; ;       6 11 5 5 > 2AHH = = >s#A B A++ B7BBctd|jj|jj D]\}}d|d<|j dd|jddj d|jj|jddj d|jj|jj D] \}}d|d< y) Nz showing infor|r}rendrrr)rrrr4rdeleteinsertr)r&rIcbs r)rzDownloaderGUI._show_infos ndhhll+**, #IE2%E'N LLE " # 5!##Atxx||4 >"1%,,Q0E0EF**, (IE2'E'N (r+cddlm}t|jD]`\}}|j |j k(s$|dkDs*|j|dz j |_ |j cSy#t$r}|d|Yd}~{d}~wt$r}|d|jYd}~d}~wwxYwNrrrrr) rrrr!r_tabrrrrr&rrrrFs r)r1zDownloaderGUI._prev_tabs00 FFAsyy{dii'AE OOAE288: F++--  F !>91==F:AHHEEFs$,B C B C#B;;Ccddlm}t|jD]v\}}|j |j k(s$|t |jdz ks@|j|dzj |_ |jcSy#t$r}|d|Yd}~d}~wt$r}|d|jYd}~d}~wwxYwr) rrrr!rrrr"rrrrrs r)r2zDownloaderGUI._next_tabs00 FFAsyy{dii'ATZZ11D,E OOAE288: F++--  F !>91==F:AHHEEFs$B C B-- C9CCcddlm}|jdj|_ |j y#t $r}|d|Yd}~yd}~wt$r}|d|jYd}~yd}~wwxYw)Nrrrrr) rrrrrrrrr)r&eventrrs r)r%zDownloaderGUI._select_tabsh0LL(..0  >     6 11 5 5 > 2AHH = = >s!; A; A A;A66A;rc|jj}|jj|jdk(r|jj }n|jdk(r|jj }nh|jdk(r|jj}n>|jdk(r|jj}nJd|jz|Dcgc]}|j|}}|jj||jjD]o\}}||jk(r.|j|jd|jdC|j|jd|jdq|jj!d d |j#|jj%||j&j(d |jj*j,g|jj.j1|j&j(d |jj*j,g|jj.j1ycc}w)Nz all packagesrrrrzbad tab value %rrr rrrX)orderi,)r selected_rowr<rrrKrrr_package_to_columnsrYr"rr_FRONT_TAB_COLOR_BACK_TAB_COLORrZ _color_tablerrafter _scrollbarrk_mlbyview)r&rrrMrowsrFrGs r)rzDownloaderGUI._fill_tables{{//1   99 &HH%%'E YY) #HH$$&E YY( "HHOO%E YY- 'HH((*E 4(4994 41;@A4((.AA 4 ****, JCdii#44Q7#44Q7  #33A6#33A6   L <  <( sDKK2266R9I9I9O9O9QRsDKK2266R9I9I9O9O9QR5Bs5J;ctt|jD]=}|jj |j|df}||j|df<?|j y)Nrr)rrrrrr)r&row_numrs r)_update_table_statusz"DownloaderGUI._update_table_statuss`S-. 4GXX__T[[,1F%GHF-3DKK) * 4 r+c|jr|j|Stt|jDcgc]'}|j|dfdk7r|j|df)}}|jj }|s||j|dfg}|j j||j j}d|_ |j||ycc}w)Nrr?r) r_download_threadedrrrrrrrr _download_cb)r&rrmarked selection download_iters r)r/zDownloaderGUI._download s   *4**A. .S-. {{36"b( KK\) *  KK,,. )/kk)\"9:;F..vtxx7L7LM  -0 s,C$c t|}fd}t|trj |jn t|trU||j|j%j|jjj dyt|tr6|d|j jzxj"dz c_n`t|t$r!|d|jjzn/t|t&r |d|jjznt|t(r |d |jjznt|t*r |d |jj,znt|t.rZxj"dzc_|d |j jzj1|j jn5t|t2r%j1|jjjj j4j6||}|j d<y#t$rIjjj dj d}|j d<YywxYw) NrrrcF|jd<j|yNrr;rrr&s r)r%z(DownloaderGUI._download_cb..show,*+D   ' IIaLr+zDownloading collection %srzDownloading package %sPackage %s is up-to-date!Finished downloading %r. Unzipping %s#Finished downloading collection %r.)next StopIterationrrr_show_progressrr.rrrrre_selectrrWrZrrbr{rqrur!r_ _clear_markrj _DL_DELAYr)r&ridsrafteridr%s` r)rzDownloaderGUI._download_cb"s }%C  c? +    - \ *  {{& S[[^^,    %  3 4 ,s~~/@/@@ A    !  0 1 )CKKNN: ; _ - ,s{{~~= >2 3 +ckknn< = . / #++"6"66 7 4 5    !  69J9JJ K   S^^.. / 1 2   S[[^^ ,((..1B1BMSVW(/ n%O   % % 'hhnnR)<){{9%a(B.,. IqL),/ IqL) #r+cnddlm}dj|j}||jd|y)NrShowTextrzNLTK Downloader Log)nltk.draw.utilrr rr)r&rrs r)rfzDownloaderGUI._show_logws*+yy++,0$7r+cg}t|jD]\}}|dk(r|jd|dk(r|j|j>|dk(r+|j|jj |n|j jdd}|jt||d|S)z Given a package, return a list of values describing that package, one for each column in ``self.COLUMNS``. rr?rrrrGzn/a) rr&rrrrrreplacer)r&rr column_index column_nameattrs r)rz!DownloaderGUI._package_to_columns}s )24<<)@ 6 %L+q  2 , 366"( 488??3/0"((*223< 73e45 6 r+c`|jry|jjd|_y)NT)rrr)r&rs r)rzDownloaderGUI.destroys# ??  r+c|j:|jjD]}|jj||jr|j r|j |jjyr6) rrr after_cancelrr_abort_downloadrr<)r&rrs r)rzDownloaderGUI._destroysm 88 ==//1 /%%g. /   !2!2  " !r+c<|jj|i|yr6)rr)r&rkwargss r)rzDownloaderGUI.mainloops4*6*r+an This tool can be used to download a variety of corpora and models that can be used with NLTK. Each corpus or model is distributed in a single zip file, known as a "package file." You can download packages individually, or you can download pre-defined collections of packages. When you download a package, it will be saved to the "download directory." A default download directory is chosen when you run the downloader; but you may also select a different download directory. On Windows, the default download directory is "package." The NLTK downloader can be used to download a variety of corpora, models, and other data packages. Keyboard shortcuts:: [return] Download [up] Select previous package [down] Select next package [left] Select previous tab [right] Select next tab cddlm} ||jd|jj ddy#||jd|jj dYyxYw)NrrzHelp: NLTK Downloaderrfixed)rrr )rrrHELPr/)r&rrs r)rnzDownloaderGUI.helpsX+ U ' !   U TXX6 8IQS Ts /80A*cddlm}d}d} ddlm}|||j y#t $r||j ||YywxYw)Nrrz'NLTK Downloader Written by Edward LoperzAbout: NLTK Downloader)Message)rr)rrrrr%rr)r&rrABOUTTITLErs r)rmzDownloaderGUI.aboutsI+?( - 2 E / 4 4 6 - TXXue , -s+A  A c|j}t|dt|d}}tdt|ddz|jzD]a}|j ||jzdzd||jz|z dz |dz|jddt |d zd z d zzz c|j d |jd d|jd|jdddd|jdy)Nrrrrriz #%02x0000rrrr)rrgradienthiddenr}redbox)r) r>rr_gradient_width create_lineabs addtag_all itemconfigaddtag_withtagcreate_rectangler=)r&r[rrrs r)r?zDownloaderGUI._init_progressbars   AgJQx[)9vq3qz?Q.43G3GGH A MMD(((2-D(((61B6 ** BQUQY")<$<=    Z  Zx 0  a((Aq!$:N:Nq:Q(R r+c|j}|)|jddddd|jddyt|dt|d}}|t|zdzd z}|jddd||d zy) Nrrrrrrrrr)r>coordsrr)r&percentr[rrrs r)rzDownloaderGUI._show_progresss    ? HHXq!Q * LL8L 4' OS8-=6E#e*$+a/A HHXq!Q 3r+c|j}|js|jddy|jdd|jd\}}}}|dkr$|j d|j dzdz dn|j dd d|j jd |j}||jd <y) Nrrrr|irrrr*r_progress_alive) r>rrbboxmoverrrrr)r&r[x1y1x2y2rs r)rzDownloaderGUI._progress_alive s     LL8L 4 LL8L 4VVJ/NBBTzzD$8$81$<#A1Ez2q)hhnnS$*>*>?G/6DMM+ ,r+c|jr|jyd|jd<tt |j Dcgc]'}|j |dfdk7r|j |df)}}|j j }|s||j |dfg}t|jj|jj}|jgk(sJ|jgk(sJ|j|||j|j|jjd|_d|_|j#|j%ycc}w)NCancelrrr?rT)rrr8rrrrrrrrrr_DownloadThreadrstartr_monitor_message_queuer)r&rrrrdss r)rz DownloaderGUI._download_threadedsd     " )1f%S-. {{36"b( KK\) *  KK,,. )/kk)\"9:;F  dhh&;&; <''2---))R///        $ $  & &  %'  ##% A s ,E1c|jrP|jj|jj d|jj yy)Nabort)rracquirerrreleaser=s r)rzDownloaderGUI._abort_downloadHsH       ' ' )  & & - -g 6    ' ' ) r+ceZdZdZdZy)DownloaderGUI._DownloadThreadc||_||_||_||_||_t j j|yr6) data_serverrlock message_queuerrThreadr*)r&r rr r rs r)r*z&DownloaderGUI._DownloadThread.__init__Os;*D DJDI!.D DJ    % %d +r+c|jj|jD]}|jj |j j ||jr7|j j d|jjy|jj|jj |j j d|jjy)Nabortedfinished) r rrr rr rrrrs r)rz!DownloaderGUI._DownloadThread.runWs''55djjA $ !!#""))#.::&&--i8II%%' !!# $ II       % %j 1 II   r+N)rBrCrDr*rr-r+r)rrNs  , r+rrc\fd}jjsyjD]x}|dk(s|dk(rjd_dj d<jdd=j dd=jj|dk(r|djdyjjdjd}|jd <yt|trj|jt|tr]||j |j"%j%|j"j&jdd_yt|t(r6|d |j*j&zxj,d z c_t|t.rPj0j3|j"j&|d |j"j&zt|t4r!|d |j"j&z4t|t6r!|d|j"j&zet|t8r!|d|j"j:zt|t<r!|d|j"j&zt|t>r[xj,d zc_|d|j*j&zjA|j*j&2t|tBsDjjA|j"j&{j rdjDd<jdd=jjjjjFjH}|jd <y)NcF|jd<j|yrrrs r)r%z2DownloaderGUI._monitor_message_queue..showhrr+rrFrrzDownload aborted!rrr)rzDownloading package %rrrrzFinished installing %srzAborting download...)%rrrrrr8rrrrrrr.rrrrrerrrWrZrrbrr>r{rqrur!rxr_rrjr;_MONITOR_QUEUE_DELAYr)r&r%rrs` r)rz$DownloaderGUI._monitor_message_queuegs2  ""**, ++3 1Cj C9$4))+$)!0:%%f-,,Q/..q1##++-)#,-''-#hhnnS$2E2EtLG>EDMM":;C1##CLL1C.S[[!;;*LL0##D)$)!C!7803>>3D3DDE  A% C!45++CKKNN;- >?C103;;>>ABC!67/#++..@AC!23^ckk&:&::;C!34- >?C!89  A% :S^^=N=NNO  !2!23C!56))+  0g3 1p  % %*@D   '  $ $Q ' ##%((..!:!:D D E'SR 1&I*0X ") &$8 , "+ 8??  D: U - O (4 7$*X*  )** .I:r+rct|tr!t|d5}t|cdddSt|S#1swYt|SxYw)zz Calculate and return the MD5 checksum for a given file. ``file`` may either be a filename or an open stream. rbN)r.r/r _md5_hexdigestrrs r)rJrJsM $ $  *!&) * * $  * $  =Act} |jd}|s |jS|j|6Nr)rrr% hexdigest)fp md5_digestrs r)rrsDJ  "     !! % r+ct|tr!t|d5}t|cdddSt|S#1swYt|SxYw)z~ Calculate and return the SHA-256 checksum for a given file. ``file`` may either be a filename or an open stream. rN)r.r/r _sha256_hexdigestrs r)sha256_hexdigestr"sM $ $  -$V, - - T "" - T ""rct} |jd}|s |jS|j|6r)rrr%r)r sha256_digestrs r)r!r!sDHM  "   " " $$ U# r+c^t|||D]}t|tst|y)z\ Extract the contents of the zip file ``filename`` into the directory ``root``. N)rr.rr)r!rootrrs r)r#r#s1 xw7% g| ,G$ $%r+c#K|r`tjjdtjj |dztjj  tj|}|j||r tyy#tj$rt|dYyt$r}t||Yd}~yd}~wwxYww)NrrzError with downloaded zip file)rqstdoutrrrrflushzipfileZipFile BadZipFilerr extractallr)r!r&rzfrs r)rrs "''--*A!*DDE  __X &MM$    8%EFF 8Q''s<A#C &B;C !C;C =CCC CC chg}ttjj|dD]D\}}}tj|j }|d|dtjj |j d}td|jD}|jdd|z|jdd|jz|jddt|j z|jd dt|j z|jd ||jd s|jd ||j|Gtt!tjj|d } t} || zD]R} | jd | vrt#d| jd z| j%| jd Tt'j(d} | jt'j(d| dj+t-|d| jt'j(d | dj+t-| dt/| | S)a} Create a new data.xml index file, by combining the xml description files for various packages and collections. ``root`` should be the path to a directory containing the package xml and zip files; and the collection xml files. The ``root`` directory is expected to have the following subdirectories:: root/ packages/ .................. subdirectory for packages corpora/ ................. zip & xml files for corpora grammars/ ................ zip & xml files for grammars taggers/ ................. zip & xml files for taggers tokenizers/ .............. zip & xml files for tokenizers etc. collections/ ............... xml files for collections For each package, there should be two files: ``package.zip`` (where *package* is the package name) which contains the package itself as a compressed zip file; and ``package.xml``, which is an xml description of the package. The zipfile ``package.zip`` should expand to a single subdirectory named ``package/``. The base filename ``package`` must match the identifier given in the package's xml file. For each collection, there should be a single file ``collection.zip`` describing the collection, where *collection* is the name of the collection. All identifiers (for both packages and collections) must be unique. rKr rc34K|]}|jywr6) file_size)rzf_infos r)rzbuild_index..- sK'G--Ksrz%srrsha256_checksumrrrrzDuplicate UID: %srmrc$|jdSNrrPrds r)rzbuild_index..I s7;;t;Lr+rc$|jdSr5r6rYs r)rzbuild_index..K sPTAUr+)_find_packagesrrr rDr!rrinfolistrkrErJr"rPrr_find_collectionsraddrElementrYr _indent_xml) r&base_urlrKpkg_xmlr.rzipstatrrruidsrMtop_elts r) build_indexrC s >H-bggll4.LM!V''"++& !F81RWW]]2;;%?%B$CDKR[[]KK   OTM%9: FD7??23 J}R[['A AB %t.>r{{.K'KL Hf%{{5! KKs #  !!&(dM)JKLK 5D;&! 88D>T !0488D>AB B $ ! !!+.G NN;&&z23 AJfX+LMN NN;&&}56 AJf[.UVW Nr+cpt|dkDr|jxsdjdz|zdz|_|D]}t||dz|ddD].}|jxsdjdz|zdz|_0|djxsdjdz|z|d_yy)z Helper for ``build_index()``: Given an XML ``ElementTree``, modify it (and its descendents) ``text`` and ``tail`` attributes to generate an indented tree, where each nested element is indented by 2 spaces with respect to its parent. rr?rr+Nr )rrr/r=tail)r2rrQs r)r=r=Q s 3x!|HHN))+d2V;dB .E v} - ."X KE***113d:VCdJEJ KB *113d:VCB r+cntjjtjj|dd|j dk7r*t dj |j dtfd|jDrt dddy ) zr Helper for ``build_index()``: Perform some checks to make sure that the given package is consistent. rrrz&package identifier mismatch ({} vs {})c3VK|] }|k7xr|jdz "ywr Nrrruids r)rz!_check_package..n s- VDCK :c :: : V&)Zipfile ..zip does not expand to a single subdirectory r N) rrrrrPrrrnamelist)r? zipfilenamer.rKs @r)_check_packagerQa s ''  277==5a8 9! ! r+c#Ktj|D]d\}}}|D]Y}|jdstjj ||}t j |j[fyw)z Helper for ``build_index()``: Yield a list of ElementTree.Element objects, each holding the xml for a single package collection. .xmlN)rrLrrr rr0rX)r&dirname_subdirsrHr!xmlfiles r)r:r: sq %'GGDM; 5 ;H  ('',,w9!''088:: ;;s 3BA Bc #Kddlm}g}tj|D]\}}}dj |||}|D]}|j drtj j ||}|dddz} tj| } tj|j} tj j|ddd | jd k7r!td | jd d dt!fd| j#Drtddd| | |f|j ds!tj j%|d} tj j || dz}tj j'|rt)j*|d| dzd| dd |j-dy#t$r} td| d | | d} ~ wwxYw#t$r} td|d | | d} ~ wwxYw#t$rY7wxYww)a Helper for ``build_index()``: Yield a list of tuples ``(pkg_xml, zf, subdir)``, where: - ``pkg_xml`` is an ``ElementTree.Element`` holding the xml for a package - ``zf`` is a ``zipfile.ZipFile`` for the package's contents. - ``subdir`` is the subdirectory (relative to ``root``) where the package was found (e.g. 'corpora' or 'grammars'). r) _path_fromr r[Nr*rzError reading file z! rrzpackage identifier mismatch (z vs rc3VK|] }|k7xr|jdz "ywrHrIrJs r)rz!_find_packages.. s5S[Cs)C%CCrLrMrNz exists, but z' cannot be found! This could mean that z can not be downloaded.r) stacklevelz.svn)nltk.corpus.reader.utilr`rrLr rrr*r+rrrr0rXrrPrrOrr warningswarnr )r&r`rKr\subdirsrHrelpathr! xmlfilenamerPr.rr? resourcenamerKs @r)r8r8 s[3H#%774=3%((:dG45+ H  ( ggll7H= )#2.7 Y 5BY)// <DDFG ggmmK$45a8;;t$+$$+KK$5s< " %58#? r7**""6*!ww//9!<  ggll7L64IJ ww~~k2MM#*M,2G1HI00<~=TV#$O+ \  NN6 "c3!Y$':;/QC%PQWXXY!Y$':;/QC%PQWXXYL   szA=IH#H):B(I$A$I &I1I I H&H!!H&&I) I 2II  I IIIIc>ttjyr6)r~ _downloaderrr-r+r)download_shellrl sK $$&r+c>ttjyr6)rrkrr-r+r) download_guirn s+'')r+c,tjyr6)rkr%r-r+r)r%r% sr+__main__) OptionParserz-dz--dirdirz!download package to directory DIRDIR)destrnmetavarz-qz--quietr1 store_trueFz work quietly)rtactiondefaultrnz-fz--forcerz"download even if already installedz-ez--exit-on-errorr2zexit if an error occursz-uz--urlrNLTK_DOWNLOAD_URLzdownload server index url)rtrxrn)r)rrr1rr2)rr1rr2rr,)MrEr-rrshutilrTrqrrrSrdr*hashlibrr urllib.errorrrurllib.requestr xml.etreerrUr rHrUrWr_rbrjrnrqrurxr{r~rrrrr~rrJrr"r!r#rrCr=rQrYr:r8rkr0rlrnr%rBoptparserqparser add_optionrnrP parse_argsrrr downloaderpkg_idrrr1rr2rvr-r+r)rs0<~Yt   ,"! Z(Z(z&+&+\!! %.%%/%+,,-)*'$#$#!'!)0)k (k (\bbJn:n:n "#%%4DN D  *& ;AVl   '* z% ^F    0          1     &     23 ( '')OWdW-E-EFJ  F$$!$[[mmmm%33 %BU{w44   ----!//  wr+