L iwddlZddlZddlZddlZddlZddlmZddlmZddlZddl Z ddl m Z m Z m Z mZddlmZddlmZddlmZddlmZmZmZmZmZd d lmZej:d Zej:d Zej@d Z!dZ"Gdde Z#GddeZ$ej:dZ%dZ&GddeZ'Gdde Z(ddZ)ddZ*ddZ+ee+Z,y)N)copy)urlparse)AbstractAsyncStreamedFileAsyncFileSystemsync sync_wrapper)DEFAULT_CALLBACK)FSTimeoutError)AbstractBufferedFile)DEFAULT_BLOCK_SIZEglob_translate isfilelike nullcontexttokenize)AllBytesz6<(a|A)\s+(?:[^>]*?\s+)?(href|HREF)=["'](?P[^"']+)z-(?Phttp[s]?://[-a-zA-Z0-9@:%_+.~#?&/=]+)z fsspec.httpc4Ktjdi|Sw)N)aiohttp ClientSession)kwargss a/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/fsspec/implementations/http.py get_clientr s  *6 **sc &eZdZdZdZdZdddddddddedf fd Zed Z d Z e d Z d Z ed ZefdZd!dZd!dZeeZdZd"dZdefdZdeddfdZdZdZ d#dZd$dZdZdZd%dZ dZ!d&d Z"xZ#S)'HTTPFileSystema2 Simple File-System for fetching data via HTTP(S) ``ls()`` is implemented by loading the parent page and doing a regex match on the result. If simple_link=True, anything of the form "http(s)://server.com/stuff?thing=other"; otherwise only links within HTML href tags will be used. )httphttps/TNbytesFc  t||f||d| ||nt|_||_||_||_||_| xsi|_| |_ | |_ | |_ d|_ t| } | jdd|_| jdd| jdd| jdd| |_ y)a NB: if this is called async, you must await set_client Parameters ---------- block_size: int Blocks to read bytes; if 0, will default to raw requests file-like objects instead of HTTPFile instances simple_links: bool If True, will consider both HTML tags and anything that looks like a URL; if False, will consider only the former. same_scheme: True When doing ls/glob, if this is True, only consider paths that have http/https matching the input URLs. size_policy: this argument is deprecated client_kwargs: dict Passed to aiohttp.ClientSession, see https://docs.aiohttp.org/en/stable/client_reference.html For example, ``{'auth': aiohttp.BasicAuth('user', 'pass')}`` get_client: Callable[..., aiohttp.ClientSession] A callable, which takes keyword arguments and constructs an aiohttp.ClientSession. Its state will be managed by the HTTPFileSystem class. storage_options: key-value Any other parameters passed on to requests cache_type, cache_options: defaults used in open() ) asynchronousloopNuse_listings_cacheFlistings_expiry_time max_pathsskip_instance_cache)super__init__r block_size simple_links same_schema cache_type cache_options client_kwargsrencodedr_sessionrpopr#)selfr*r) same_scheme size_policyr,r-r!r"r.rr/storage_optionsrequest_options __class__s rr(zHTTPFileSystem.__init__1sT WLtWW(2(>*DV(&$**0b$ %  /"1"5"56JE"R2D9K.148% cy)Nrrr2s rfsidzHTTPFileSystem.fsidrsr8cDtj||jS)N)r/)yarlURLr/r2urls r encode_urlzHTTPFileSystem.encode_urlvsxxT\\22r8c|*|jr t||jdyt |dd}||jyy#ttt f$rY7wxYw)Ng?)timeout _connector) is_runningrclose TimeoutErrorr NotImplementedErrorgetattr_close)r"session connectors r close_sessionzHTTPFileSystem.close_sessionysl   1 T7==#6G\48      !!.2EF  sAA%$A%c,K|jw|jdd|ji|jd{|_|js6t j ||j|j|j|jS7Ww)Nr"r)r0rr"r.r!weakreffinalizerMr:s r set_sessionzHTTPFileSystem.set_sessionsr == "1$//"Wtyy"WDDVDV"WWDM$$  t'9'9499dmmT}}Xs8BBABc|S)z-For HTTP, we always want to keep the full URLr)clspaths r_strip_protocolzHTTPFileSystem._strip_protocols  r8cDt||}t|dkDr|Sy)N)r'_parentlen)rSrTparr7s rrYzHTTPFileSystem._parents&god# s8a<Jr8c K|jj}|j|tj ||j d{}|j |j|fi|j4d{}|j||d|jvr"|jdjdd}nd}|dvr |jdd{}|jr=tj|tj|D cgc]} | d c} z} n%tj|D cgc]} | d } } ng} dddd{t#} t%|}  D]B} t'| t(r| d} | j+d r+t-| dkDr| j.d | j0| } | j+d r|j2r5| j+|j5d d zr| j7| | j9d d j+|j9d d j5d d zs| j7| | d vs| j7d j;|j5d | j=d gE| s;|j?d r*|jA|j5d dd{} |r'| D cgc]} | d| j?d rdnddc} StC| S777Zcc} wcc} w#t $rg} YwxYw7#1d{7swY xYw7vcc} ww)N Content-Type;r)z text/htmlNignore)errorsrrz://rr)z..z../F)detail directoryfilenamesizetype)"rrupdateloggerdebugrQgetrA_raise_not_found_for_statusheaders partitiontextr*ex2findallexUnicodeDecodeErrorsetr isinstancetuple startswithrZschemenetlocr+rstripaddreplacejoinlstripendswith_ls_realsorted)r2r@rbrkwrKrmimetyperpulinksoutpartsls rrzHTTPFileSystem._ls_reals  [[    & S((**7;;ts3Ct{{C  q  , ,Q 4*99^4>>sCAF..!"x!88D(( # D 12::dCS4TaQqT4T T/1zz$/? @!1 @ @%  (e  HA!U#aD||C SVaZ||nC ~aS9||F### SZZ_s5J(KGGAJYYw/::KK077.gen_chunks%s%+%e, ud+ /A%%affQl3FF1I%%ga&>?z*K,,SZ8FF:.E / / /s9C)BC C)C&"C))rputz.method has to be either 'post' or 'put', not: data) rHrrrirQlowerrrIrArm) r2rrrrmethodmoderrrrKmethresps ` `` r _put_filezHTTPFileSystem._put_files ; %&78 8 /.[[    &((**  (@ K w'.HZ\HRH : :D  , ,T5 9 : : :+ : : : : :s[AC4CAC41C2C45C C4CC4C4C4C1%C( &C1-C4cK|jj}|j| tj ||j d{}|j |j|fi|d{}|4d{|jdkcdddd{S7Y717(7 #1d{7swYyxYw#tj$rYywxYww)NiF) rrrirjrkrQrlrArr ClientError)r2rTrrrKrs r_existszHTTPFileSystem._existsJs [[    &  LL  ,,..G!gkk$//$"7>2>>A & &xx#~ & & &/> & & & & &""  s,C'(CB1)CB3 C B5 CB9 C+B7,C0C'1C3C5C7C9C ?C C C C' CC$!C'#C$$C'cDK|j|fi|d{S7wN)r)r2rTrs r_isfilezHTTPFileSystem._isfileVs"!T\\$1&1111s   c "|dk7rt||n |j}|jj} |j| d<| j |i} |xs)| j |j |fi|xs| d}t|j|j} |rO|rM| jddr;t||f| ||||xs |j|xs |j|jd| St||f||j| d| S)aMake a file-like object Parameters ---------- path: str Full URL with protocol mode: string must be "rb" block_size: int or None Bytes to download in one request; use instance value if None. If zero, will return a streaming Requests file-like instance. kwargs: key-value Any other parameters, passed to requests calls rr!rgpartialT)rKr)rrgr,r-r")rr"rK)rHr)rrr!riinforr"rQrlHTTPFiler,r-HTTPStreamFile) r2rTrr) autocommitr,r-rgrrrrKs r_openzHTTPFileSystem._openYs,2 4<% %#-#9Zt [[   !..> &Mt{{9499T#9C>CCc K||dkr tdddl}|jd}|j|}|xs|jd}|j ddk\r|j dn t |}|j ddk\r|j dn t |}t ||} |jd d } t|sC|j|fi|d{r"| s|gS||j|fi|d{iS| sgSiSd|d| vr7|d| jd} |d| dz} || dzdjddz} nd } || dzdjddz} d |vr5|1|j d } || djddz}| |z |z} nd} |j| f| d d d|d{}t||rdnd z}|j|}t!|j#Dcic]T\}}|j%|j'dr/|s*|ddk(r"|jdr|j'dn||V}}}| r|St)|S77l7cc}}ww)a Find files by glob-matching. This implementation is idntical to the one in AbstractFileSystem, but "?" is not considered as a character for globbing, because it is so common in URLs, often identifying the "query" part. Nrazmaxdepth must be at least 1rr)z/**z/**[rbFrXz**T)maxdepthwithdirsrbrhrc)rrerrUfindrZminr1 has_magicrrrindexcount_findr compileritemsmatchr{list)r2rTrrrends_with_slashappend_slash_to_dirnameidx_star idx_bracemin_idxrbrootdepthidx_double_starsdepth_double_starsallpathspatternprrs r_globzHTTPFileSystem._globs  HqL:; ;--,##D)"1"QT]]=5Q%)YYs^q%8499S>c$i&*iin&9DIIcNs4y h *He,!T\\$1&111 6M  4(B6(B"BCCII D'N "8Gn++C0G 'A+&D1'--c2Q6ED1'--c2Q6E 4<##'99T? %)*:*;%<%B%B3%G!%K" 22X=#   4 @F  !R!HI"**W%"(.."23  4}}QXXc]+ /LK/JJsO       J9 a2#C,   sKC-J/I90 JI<B8J I? AJAJ'J<J?JJc~K t|j|d{S7#ttf$rYywxYww)NF)boolrrr)r2rTs r_isdirzHTTPFileSystem._isdirs: dhhtn,- -,!:.  s(=(&(=(:=:=cdK|j|}|jdi}tt||d<|j d{}|j |f||d|4d{}|j dddd{y7D7'7 #1d{7swYyxYww)a Write bytes to a remote file over HTTP. Parameters ---------- path : str Target URL where the data should be written value : bytes Data to be written mode : str How to write to the file - 'overwrite' or 'append' **kwargs : dict Additional parameters to pass to the HTTP request rnContent-LengthN)rrn)rUr1strrZrQrr) r2rTvaluerrr@rnrKrs r _pipe_filezHTTPFileSystem._pipe_files""4(**Y+$'E O !((**7;;sJJ6J ! !a    ! ! !+ ! ! ! ! !sZAB0BB0/B0B03B B0BB0B0B0B-!B$ "B-)B0)T)NN)rNNNNN)rNr)r)$__name__ __module__ __qualname____doc__rseprr(propertyr;rA staticmethodrMrQ classmethodrUrYrrrlsrmrr rrrrrrrrrrr __classcell__r7s@rrr$s !H C ?&B3  :x c B$&(1;K$D! 0:d 2  7r 9 CDGR!r8rcreZdZdZ dfd Zdfd ZdZeeZdZ dZ ee Z xZ S) ra A file-like object pointing to a remote HTTP(S) resource Supports only reading, with read-ahead of a predetermined block-size. In the case that the server does not supply the filesize, only reading of the complete file in one go is supported. Parameters ---------- url: str Full URL of the remote resource, including the protocol session: aiohttp.ClientSession or None All calls will be made within this session, to avoid restarting connections where the server allows this block_size: int or None The amount of read-ahead to do, in bytes. Default is 5MB, or the value configured for the FileSystem creating this file size: None or int If given, this is the size of the file in bytes, and we don't attempt to call the server to find the value. kwargs: all other key-values are passed to requests calls. c |dk7r td| |_| |_||_||_||dd|_t |d||||||d| y)NrzFile mode not supportedrdre)fsrTrr)r,r-r)rHr!r"r@rKdetailsr'r() r2r r@rKr)rr,r-rgr"r!rr7s rr(zHTTPFile.__init__Jsq 4<%&?@ @(  #T6B   !!'   r8c>|dkrD|jdk(r5|j|j|jks|j|j|dkr4|jn#t |j|jz |}t ||S)a5Read bytes from file Parameters ---------- length: int Read up to this many bytes. If negative, read all content to end of file. If the server has not supplied the filesize, attempting to read only part of the data will raise a ValueError. r)locrg blocksize _fetch_allrr'r)r2lengthr7s rrz HTTPFile.readis|aZDHHMYY*tyyDNN/J OO  99 z!TXX-v6Fw|F##r8c0Ktjd|t|jts|j j |jj|jfi|jd{}|4d{|j|jd{}t t|dd||_t||_dddd{yy7v7m7G7 #1d{7swYyxYww)zRead whole file in one shot, without caching This is only called when position is still at zero, and read() is called without a byte-count. zFetch all for N)rgfetcherrr)rjrkrvcacherrKrlr rAr@rrrrZrg)r2rrs rasync_fetch_allzHTTPFile.async_fetch_alls  ~dV,-$**h/&dll&&tww'9'9$(('CSt{{SSA % %""$FFHn%S44c  H  % % %0S %$ % % % %slBDC9 D C;D$D5C=61D' D2C?3D;D=D?DDD DDc|jdd}tjd|}|sy|ddk(rdx}}n.|djdDcgc] }t |c}\}}|d dk(rdn t |d }|||fScc}w) zParse the Content-Range header Content-RangerXzbytes (\d+-\d+|\*)/(\d+|\*))NNNrarN-r)rlrrsplitr)r2rnsmrrxtotals r_parse_content_rangezHTTPFile._parse_content_ranges KK , HH3Q 7# Q43; EC*+A$**S/:Q#a&:JE3! QqTc5  ;sBcKtjd|d|d||jj}|j dij}d|d|dz |d<tj|j d|d|j j|jj|j fd|i|d {}|4d {|jd k(r d d d d {y |j|jd k(xsQ|j|jd |k(xs.t|jjd|dz||z k}|r|jd {}n{|d kDr t!dd }g} |j"jdd {} | r(|j%| |t'| z }|||z kDrnnOd j)|d ||z }|cd d d d {S7L7D7'77e7#1d {7swYy xYww)a3Download a block of data The expectation is that the server returns only the requested bytes, with HTTP code 206. If this is not the case, we first check the headers, and then stream the output - if the data size is bigger than we requested, an exception is raised. zFetch range for z: rrnbytes=rarz : Nir8rrzThe HTTP server doesn't appear to support range requests. Only reading this file from the beginning is supported. Open with block_size=0 for a streaming file interface.i)rjrkrrr1r@rKrlr rArrr rnrrrrappendrZr~) r2rrrrnrresponse_is_rangerclrs rasync_fetch_rangezHTTPFile.async_fetch_rangesM  'vRwau=>!!#**Y+002#E7!C!G95 zWW%5$678"$,,"" GG  txx ( 29 =C  ) ) xx3) ) )     CP,,QYY7:eCPqyy}}%5sQw?@C%KO  !FFHn M"#)).."77E 5)c%j(e +!hhsmMcEk2S) ) )  ) ) $%8?) ) ) ) sCIH! I H$!I$H05 IH'IBH0 H*8H0H,AH0 IH.I$I'I*H0,H0.I0I6H9 7I>I)NNrrNNNF) rrrrr(rrrrr r' _fetch_ranger r s@rrr1sU8    >$.%"o.J !9v 12Lr8rz([*[])c4tj|}|duSr) magic_checksearch)rrs rrrs   q !E  r8cPeZdZdfd ZddZddZeeZdZfdZ xZ S) rc jdd__|_|_|dk7rt dd_t d||ddfd}tj|_ |j_y) Nr!Frrnoner rTrr,cKjjjjfij d{}jj ||S7"wr)rKrlr rA __aenter__rm)rrr2r@s rcorz$HTTPStreamFile.__init__..cors\&dll&&tww'9'9#'>I&ITTVVA GG / /3 7HWsAA0 A. #A0r) r1r!r@r"rKrrr'r(rr) r2r r@rr"rKrr4r7s ` ` ` rr(zHTTPStreamFile.__init__s"JJ~u=  4<  #T2  QBStQ&Q  dii%GG r8cX|dk(r|dk(ry||jk(r|dk(rytd)NrrazCannot seek streaming HTTP file)rr)r2rwhences rrzHTTPStreamFile.seeks2 !8!  $((?v{ :;;r8cK|jjj|d{}|xjt |z c_|S7$wr)rrrrrZ)r2numrs r_readzHTTPStreamFile._reads=FFNN'',, CH -s)AA%Ac@K|jjywr)rrFr:s rrJzHTTPStreamFile._closes  sc~tj|j|jt|yr)asynciorun_coroutine_threadsaferJr"r'rFr2r7s rrFzHTTPStreamFile.close s&(( B  r8)rNN)rr() rrrr(rr9rrrJrFr r s@rrrs-$<  Dr8rc4eZdZ dfd ZddZfdZxZS)rc ||_||_d|_|dk7rt|dd|_||_t ||||d||_y)Nrrr0r1) r@rKrrrrr'r(rg) r2r r@rr"rKrgrr7s rr(zAsyncStreamFile.__init__sW  4<  #T2   BStG r8cK|j|jj|jj |j fi|j jd{}|jj||j ||_|jjj|d{}|xjt|z c_ |S7~7&wr) rrKrlr rAr@rr3rmrrrrZ)r2r8rrs rrzAsyncStreamFile.reads 66>&dll&&""488,04 jlA GG / /488 <DFFFNN'',, CH  -s%A(C-*C)+AC-C+%C-+C-cK|j!|jjd|_t|d{y7wr)rrFr'r>s rrFzAsyncStreamFile.close)s4 66  FFLLNDFgmos?A AA )rNNNr()rrrr(rrFr r s@rrrs@D  r8rcK|j}|jdij}d|d|dz |d<|j|fd|i|d{}|j|4d{|j d{}dddd{|r8t |d5} | j || jdddyS77j7T7F#1d{7swYVxYw#1swYyxYww)Nrnr"rrarzr+b)rr1rlrrrrr) rKr@rrrdrrnrrrs r get_rangerD0s [[]FjjB',,.Gwaay1GGgkk#9w9&99AFFHn $  ! FF5M GGCL    :  sADCD4C5D8C% C! C% DC#D/#C: DD!C%#D%C7+C. ,C73D:D?DcbKtjd||j}|jdd}|j dij}d|d<||d<i}|dk(r|j |fd|i|d{}n1|d k(r|j|fd|i|d{}nt d ||4d{|jd |jvr;d |jvs|jd d vrWt|jd |d<n;d|jvr-t|jdjdd|d<d|jvr$|jdjdd|d<|jj ddk(rd|d<t|j|d<dD]0}|jj |s|j|||<2dddd{|S77j7S7#1d{7swY|SxYww)zCall HEAD on the server to get details about the file (size/checksum etc.) Default operation is to explicitly allow redirects and use encoding 'identity' (no compression) to get the true size of the target. zRetrieve file size for %sallow_redirectsTrnidentityzAccept-EncodingrNrlz)size_policy must be "head" or "get", got rzContent-Encoding)rGrXrgrrrar]r^rrz Accept-Rangesr0Frr@)ETagz Content-MD5Digestz Last-Modified)rjrkrr1rlr TypeErrorrrnrrrorr@) r@rKr4rarrrrchecksum_fields rrrAs6  LL,c2 [[]F %t ,B ::i $ ) ) +D(D F9 Df',,sABA&A A  '++c@2@@ @CK=QRRAA  qyy ("2aii@R6SX7 #199-=#>?V  )qyy9??DQGHDL QYY & yy8BB3GJD  99== )V 3$DO!%%jU P ANyy}}^,'(yy'@^$ A1AA8 KC B @AAAAA8 KssBH/H"H/'H(H/HH/D"H(H< H/HH/H/H/H/H,H" H,'H/cK|td{}t|g|d|i|d{}|jdS7.7w)NrKrg)rrrl)r@rKargsrrs r _file_sizerOtsI" $CB4BB6B BD 88F % BsAAAAAAr)r)-r<rloggingrrOr urllib.parserrr= fsspec.asynrrrrfsspec.callbacksr fsspec.exceptionsr fsspec.specr fsspec.utilsr r rrrcachingrrrsrq getLoggerrjrrrr,rrrrDrrO file_sizerr8rrZs  ! VV-,,RZZMNbjjEF   = )+J!_J!Zk3#k3\bjj"  &)&R/@"0f  $ r8