L i(dZddlZddlmZddlZddlZddlZddl Z ddl m Z  ddlZesJdZddlmZddlmZdd lmZmZgd ZGd d ZGd deZedCideddeddeddeddeddeddeddedded d!ed"d#ed$d%ed&d'ed(d)ed*d+ed,d-ed.d/ed0d1ed2d3ed4d5ed6Zej;edddddddd7dd8ddddddddddddd9d:dddddddddddddddd;'d<Zed=j>dCiee_ dDdddddddd9d:dddddddddddddddd8dddd>d?Z ed@j>dCiee _ dDddddddddddddddA dBZ!y#e$rdZYwxYw)Ez9Plotting functions for linear models (broadly construed).N)dedentTF)utils) algorithms) FacetGrid _facet_docs)lmplotregplot residplotc"eZdZdZdZdZdZy)_LinearPlotterzBase class for plotting relational data in tidy format. To get anything useful done you'll have to inherit from this, but setup code that can be abstracted out should be put here. c ||_t|jDcgc]}t|tc}}|r | t d|j D]\}}t|tr||}n(t|trtj|}n|}|$|jdk7rtj|}tj|dkDr d}t |t|||ycc}w)z,Extract variables from data or use directly.Nz*Must pass `data` if using named variables.)rrzregplot inputs must be 1d)dataanyvalues isinstancestr ValueErroritemslistnpasarrayshapesqueezendimsetattr) selfrkwsv any_stringsvarvalvectorerrs X/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/seaborn/regression.pyestablish_variablesz"_LinearPlotter.establish_variables s szz|D!:a-DE 4<IJ J 'HC#s#cC&C!flld&:F+wwv"1 o% D#v & ' EsC;c Z|Dcgc]}t||}}|Dcgc]}|| }}tjtj|Dcgc]}t j |c}d}|D]!}t||}|t ||||#ycc}wcc}wcc}w)z&Remove observations with missing data.Nraxis)getattrrall column_stackpdnotnullr)rvarsr!valsrnot_nar"s r%dropnaz_LinearPlotter.dropna8s.23sc"331a1=11(E1A(EFQO 0C$$Cc3v;/ 041(EsBB#B#B( ctN)NotImplementedError)raxs r%plotz_LinearPlotter.plotBs!!N)__name__ __module__ __qualname____doc__r&r2r7r8r%r r s '00"r8r ceZdZdZ ddZedZedZdZddZ dZ d Z d Z d Z d Zd ZdZdZdZdZy)_RegressionPlotterzPlotter for numeric independent variables with regression model. This does the computations and drawing for the `regplot` function, and is thus also used indirectly by `lmplot`. Nc||_| |_|dk(r| n||_| |_| |_||_||_| |_||_||_ ||_ ||_ ||_ ||_ ||_||_||_t#| dkD||||fdkDr t%d|j'|||| |||r|j)ddddd |j*+|j-|j.|j*|_|j0+|j-|j2|j0|_|5|t4j6n||_|j9|\}}||_n|j.|_t=|j.dkrd |_|j r:|j.j?|j.jAf|_!yy) Ncirz&Mutually exclusive regression options.)xyunits x_partial y_partialrBrCrDrErFF)" x_estimatorrAx_cin_bootseedscatterfit_regorderlogisticlowessrobustlogxtruncatex_jittery_jittercolorlabelsumrr&r2rE regress_outrBrFrCrmean bin_predictor x_discretelenminmaxx_range)rrBrCrrGx_binsrHrKrLrArIrDrJrMrNrOrPrQrErFrRr2rSrTrUrVr[s r%__init__z_RegressionPlotter.__init__Ls'$,BD                     8VVT: ;a ?EF F   au+4  ! K  KKS'; D >> %%%dffdnn=DF >> %%%dffdnn=DF  *5*=rww;D !%!3!3F!; J(DO"ffDO tvv;!  DL <<66::<5DL r8c|j}| |j}nB|jtjj | |t |jz}|j }||j}||fS|jtjj | |t |jz}||fS)z'Data where each observation is a point.)rSrBrrandomuniformr\rTrC)rx_jrBy_jrCs r% scatter_dataz_RegressionPlotter.scatter_datasmm ;A**C4c$&&kBBAmm ;A!t **C4c$&&kBBA!t r8c|j|j}}ttj|}gg}}|D]}|||k(}|j |}|j ||j|j dLd} |jdk(r tj|} || z || zf} nv|j|j||k(} tj||j |j| |j} tj| |j} |j | |||fS)z#}}T4::> D* ]] G <#33D#;C: 4 G D* [[B*JD$ [[ B#33D#> D* YY#}}T2 D*#}}T2 D* :IRa8IT9$$r8c d}tjtjt|j|jf|j }}tjtjt||f}|j |||}|j|dfStj||||j|j|jj}|j |j}||fS)z9Low-level regression and prediction using linear algebra.c^tjj|j|Sr4)rlinalgpinvdot_xrts r%reg_funcz-_RegressionPlotter.fit_fast..reg_funcs 99>>"%))"- -r8Nrj)rc_onesr\rBrCrrArprqrIrDrJT)rrrXrCr beta_bootsrs r%rz_RegressionPlotter.fit_fasts .uuRWWS[)46612DFF1uuRWWSY'-.xxA' 77?: ^^Aq)1+/;;*.**)- 456A  XXj)++ Zr8cfd}|j|j}}|||}|j|dfStj||||j |j |j}||fS)z7Regression using numpy polyfit for higher-order trends.cZtjtj||Sr4)rpolyvalpolyfit)rrtrrMs r%rz-_RegressionPlotter.fit_poly..reg_funcs!::bjjR7> >r8Nrj)rBrCrArprqrIrDrJ)rrrMrrBrCrrs `` r%rz_RegressionPlotter.fit_polysp ?vvtvv11~ 77?: ^^Aq)1+/;;*.**)- 4 Zr8c  ddlmcm tjtj t |j|jf|j}}tjtj t f fd}|||}|j|dfStj||||j|j|j}||fS)z;More general regression function using statsmodels objects.rNcjf} tj5tdr1tjdj g|j }||fij j}ddd|S#1swYSxYw#|$rAtjt}|jtjY|SwxYw)NPerfectSeparationWarningerror) PerfectSeparationErrorwarningscatch_warningshasattr simplefilterrfitpredictremptyr\fillnan)rrt err_classesrrkwargsmodelsmes r%rz4_RegressionPlotter.fit_statsmodels..reg_funcs557K ",,.Gs$>? --gs7S7ST&R &RS5Q5Q&R  R262668@@FD GKGK "xxD * "&&!K "s0B A%B B BB B AC&%C&rj)statsmodels.tools.sm_exceptionstools sm_exceptionsrrrr\rBrCrArprqrIrDrJ) rrrrrrCrrrrs ``` @r%rz"_RegressionPlotter.fit_statsmodelss55uuRWWS[)46612DFF1uuRWWSY'-. 1~ 77?: ^^Aq)1+/;;*.**)- 4 Zr8cjddlm}||j|jj\}}||fS)z>Fit a locally-weighted regression, which returns its own grid.r)rO)*statsmodels.nonparametric.smoothers_lowessrOrCrBr)rrOrrs r%rz_RegressionPlotter.fit_lowess2s-EDFFDFF+-- dTzr8cFtjtjt|j|jf|j }}tjtjt|tj |f}d}|j|||}|j|dfStj||||j|j|jj}|j|j}||fS)zFit the model in log-space.ctj|dddftj|dddff}tjj |j |S)Nrr)rrlogrrrrs r%rz-_RegressionPlotter.fit_logx..reg_func=sLr!Q$x1a4!112B99>>"%))"- -r8Nrj)rrrr\rBrCrrrArprqrIrDrJr)rrrrCrrrrs r%rz_RegressionPlotter.fit_logx8suuRWWS[)46612DFF1uuRWWSY'56 .xxA' 77?: ^^Aq)1+/;;*.**)- 456A  XXj)++ Zr8ctj|j}tj|r4tjdd|dzdd}tj ||}ntj |}tjtjj||}|tj|dj }||fS)z9Discretize a predictor by assigning value to closest bin.rrrr() rrrBisscalarr percentileravelabssubtractouterargmin)rbinsrB percentilesdistx_binneds r%rZz _RegressionPlotter.bin_predictorMs JJtvv  ;;t ++adQh7"=K==K0D88D>Dvvbkk''401 $Q/0668~r8cZ|j}||z }||jz }tj|}||jtjj |j|z }tj ||zj|jS)z+Regress b from a keeping a's original mean.) rYrrrrrrreshaper)raba_meana_primes r%rXz_RegressionPlotter.regress_out[s J L EE!HaeeBIINN1-11!455zz'F*+33AGG<  * ="'H &DOr8r? model_apiz There are a number of mutually exclusive options for estimating the regression model. See the :ref:`tutorial ` for more information. regplot_vs_lmplotz The :func:`regplot` and :func:`lmplot` functions are closely related, but the former is an axes-level function while the latter is a figure-level function that combines :func:`regplot` and :class:`FacetGrid`. rGaJ x_estimator : callable that maps vector -> scalar, optional Apply this function to each unique value of ``x`` and plot the resulting estimate. This is useful when ``x`` is a discrete variable. If ``x_ci`` is given, this estimate will be bootstrapped and a confidence interval will be drawn. r`a x_bins : int or vector, optional Bin the ``x`` variable into discrete bins and then estimate the central tendency and a confidence interval. This binning only influences how the scatterplot is drawn; the regression is still fit to the original data. This parameter is interpreted either as the number of evenly-sized (not necessary spaced) bins or the positions of the bin centers. When this parameter is used, it implies that the default of ``x_estimator`` is ``numpy.mean``. rHaZ x_ci : "ci", "sd", int in [0, 100] or None, optional Size of the confidence interval used when plotting a central tendency for discrete values of ``x``. If ``"ci"``, defer to the value of the ``ci`` parameter. If ``"sd"``, skip bootstrapping and show the standard deviation of the observations in each bin. rKz scatter : bool, optional If ``True``, draw a scatterplot with the underlying observations (or the ``x_estimator`` values). rLz fit_reg : bool, optional If ``True``, estimate and plot a regression model relating the ``x`` and ``y`` variables. rAaw ci : int in [0, 100] or None, optional Size of the confidence interval for the regression estimate. This will be drawn using translucent bands around the regression line. The confidence interval is estimated using a bootstrap; for large datasets, it may be advisable to avoid that computation by setting this parameter to None. rIz n_boot : int, optional Number of bootstrap resamples used to estimate the ``ci``. The default value attempts to balance time and stability; you may want to increase this value for "final" versions of plots. rDa units : variable name in ``data``, optional If the ``x`` and ``y`` observations are nested within sampling units, those can be specified here. This will be taken into account when computing the confidence intervals by performing a multilevel bootstrap that resamples both units and observations (within unit). This does not otherwise influence how the regression is estimated or drawn. rJz seed : int, numpy.random.Generator, or numpy.random.RandomState, optional Seed or random number generator for reproducible bootstrapping. rMz order : int, optional If ``order`` is greater than 1, use ``numpy.polyfit`` to estimate a polynomial regression. rNaw logistic : bool, optional If ``True``, assume that ``y`` is a binary variable and use ``statsmodels`` to estimate a logistic regression model. Note that this is substantially more computationally intensive than linear regression, so you may wish to decrease the number of bootstrap resamples (``n_boot``) or set ``ci`` to None. rOz lowess : bool, optional If ``True``, use ``statsmodels`` to estimate a nonparametric lowess model (locally weighted linear regression). Note that confidence intervals cannot currently be drawn for this kind of model. rPai robust : bool, optional If ``True``, use ``statsmodels`` to estimate a robust regression. This will de-weight outliers. Note that this is substantially more computationally intensive than standard linear regression, so you may wish to decrease the number of bootstrap resamples (``n_boot``) or set ``ci`` to None. rQz logx : bool, optional If ``True``, estimate a linear regression of the form y ~ log(x), but plot the scatterplot and regression model in the input space. Note that ``x`` must be positive for this to work. xy_partialz {x,y}_partial : strings in ``data`` or matrices Confounding variables to regress out of the ``x`` or ``y`` variables before plotting. rRz truncate : bool, optional If ``True``, the regression line is bounded by the data limits. If ``False``, it extends to the ``x`` axis limits. xy_jitteraT {x,y}_jitter : floats, optional Add uniform random noise of this size to either the ``x`` or ``y`` variables. The noise is added to a copy of the data after fitting the regression, and only influences the look of the scatterplot. This can be helpful when plotting variables that take discrete values. scatter_line_kwsz {scatter,line}_kws : dictionaries Additional keyword arguments to pass to ``plt.scatter`` and ``plt.plot``. orr)'rBrChuecolrowpalettecol_wrapheightaspectmarkerssharexsharey hue_order col_order row_orderlegend legend_outrGr`rHrKrLrArIrDrJrMrNrOrPrQrErFrRrSrTrr facet_kwsc'*''i''fd}(|(d| |(d| |(d|| td||||||| |!g})tj|)D*cgc]}*|*|* c}*j}+||+}t |f||||||| || |d '},|,j d}-nt |,j }-t| ts| g|-z} t | |-k7r tdd | i|,_ d }.|,j|.|| td!id |d |d|d|d|d|d|d|d|d|d|d|d|d|d| d|!d|"d|#d|$d|%d |&}/|,jtf||d |/|,j|||r||||fvr|,j|,Scc}*w)"NcT|d}| tj|t||<yy)Nzj is deprecated from the `lmplot` function signature. Please update your code to pass it using `facet_kws`.)rwarn UserWarning)keyr"msgrs r%facet_kw_deprecationz$lmplot..facet_kw_deprecationPs<eD D  ? MM#{ + IcN r8rrrz)Missing required keyword argument `data`.) rrrrrrrrrrrzSmarkers must be a singleton or a list of markers for each level of the hue variablerc|||gjjt}|j|d|j dy)NF)updatey)scaley)to_numpyastypefloatupdate_datalimautoscale_view)rrBrCr6rxyss r%r-zlmplot..update_datalim{sGAq6l##%,,U3 #u- 'r8)rBrCrGr`rHrKrLrArIrDrJrMrNrOrPrQrErFrRrSrTrrr=) TypeErrorrrmtolistr hue_namesr\rrrhue_kws map_dataframedictr set_axis_labels add_legend)0rrBrCrrrrrrrrrrrrrrrrGr`rHrKrLrArIrDrJrMrNrOrPrQrErFrRrSrTrrrr& need_colsrcolsfacets n_markersr- regplot_kwss0 ` r%r r @sm !6*6*z2 |CDDAsCeY BI 99? ?(FN(  12(.59!(-/8>FK*2;A ! -6 BK    %-  8@    +3 KF:A:k: 1a 3?S#J)> M_=s FFaa Plot data and regression model fits across a FacetGrid. This function combines :func:`regplot` and :class:`FacetGrid`. It is intended as a convenient interface to fit regression models across conditional subsets of a dataset. When thinking about how to assign variables to different facets, a general rule is that it makes sense to use ``hue`` for the most important comparison, followed by ``col`` and ``row``. However, always think about your particular dataset and the goals of the visualization you are creating. {model_api} The parameters to this function span most of the options in :class:`FacetGrid`, although there may be occasional cases where you will want to use that class and :func:`regplot` directly. Parameters ---------- {data} x, y : strings, optional Input variables; these should be column names in ``data``. hue, col, row : strings Variables that define subsets of the data, which will be drawn on separate facets in the grid. See the ``*_order`` parameters to control the order of levels of this variable. {palette} {col_wrap} {height} {aspect} markers : matplotlib marker code or list of marker codes, optional Markers for the scatterplot. If a list, each marker in the list will be used for each level of the ``hue`` variable. {share_xy} .. deprecated:: 0.12.0 Pass using the `facet_kws` dictionary. {{hue,col,row}}_order : lists, optional Order for the levels of the faceting variables. By default, this will be the order that the levels appear in ``data`` or, if the variables are pandas categoricals, the category order. legend : bool, optional If ``True`` and there is a ``hue`` variable, add a legend. {legend_out} .. deprecated:: 0.12.0 Pass using the `facet_kws` dictionary. {x_estimator} {x_bins} {x_ci} {scatter} {fit_reg} {ci} {n_boot} {units} {seed} {order} {logistic} {lowess} {robust} {logx} {xy_partial} {truncate} {xy_jitter} {scatter_line_kws} facet_kws : dict Dictionary of keyword arguments for :class:`FacetGrid`. See Also -------- regplot : Plot data and a conditional model fit. FacetGrid : Subplot grid for plotting conditional relationships. pairplot : Combine :func:`regplot` and :class:`PairGrid` (when used with ``kind="reg"``). Notes ----- {regplot_vs_lmplot} Examples -------- .. include:: ../docstrings/lmplot.rst )rBrCrGr`rHrKrLrArIrDrJrMrNrOrPrQrErFrRr2rSrTrVrUrrrr6c t|||||||||| | | | | |||||||||||}|tj}|intj|}||d<|intj|}|j ||||S)Nr)r?pltgcacopyr7)rrBrCrGr`rHrKrLrArIrDrJrMrNrOrPrQrErFrRr2rSrTrVrUrrrr6plotters r%r r s!At[&$!('2vud!&&&$!*Ix!)8UE CG  z WWY#+";1GK"K%r499X+>H LL[(+ Ir8a Plot data and a linear regression model fit. {model_api} Parameters ---------- x, y: string, series, or vector array Input variables. If strings, these should correspond with column names in ``data``. When pandas objects are used, axes will be labeled with the series name. {data} {x_estimator} {x_bins} {x_ci} {scatter} {fit_reg} {ci} {n_boot} {units} {seed} {order} {logistic} {lowess} {robust} {logx} {xy_partial} {truncate} {xy_jitter} label : string Label to apply to either the scatterplot or regression line (if ``scatter`` is ``False``) for use in a legend. color : matplotlib color Color to apply to all plot elements; will be superseded by colors passed in ``scatter_kws`` or ``line_kws``. marker : matplotlib marker code Marker to use for the scatterplot glyphs. {scatter_line_kws} ax : matplotlib Axes, optional Axes object to draw the plot onto, otherwise uses the current Axes. Returns ------- ax : matplotlib Axes The Axes object containing the plot. See Also -------- lmplot : Combine :func:`regplot` and :class:`FacetGrid` to plot multiple linear relationships in a dataset. jointplot : Combine :func:`regplot` and :class:`JointGrid` (when used with ``kind="reg"``). pairplot : Combine :func:`regplot` and :class:`PairGrid` (when used with ``kind="reg"``). residplot : Plot the residuals of a linear regression model. Notes ----- {regplot_vs_lmplot} It's also easy to combine :func:`regplot` and :class:`JointGrid` or :class:`PairGrid` through the :func:`jointplot` and :func:`pairplot` functions, although these do not directly accept all of :func:`regplot`'s parameters. Examples -------- .. include:: ../docstrings/regplot.rst ) rBrCrErFrOrMrPr2rVrUrrr6c t|||d|||||| |  }| tj} |j|j\}}}|j |z |_|rd|_nd|_| jddd | in| j} | in| j} |j| | | | S) a Plot the residuals of a linear regression. This function will regress y on x (possibly as a robust or polynomial regression) and then draw a scatterplot of the residuals. You can optionally fit a lowess smoother to the residual plot, which can help in determining if there is structure to the residuals. Parameters ---------- data : DataFrame, optional DataFrame to use if `x` and `y` are column names. x : vector or string Data or column name in `data` for the predictor variable. y : vector or string Data or column name in `data` for the response variable. {x, y}_partial : vectors or string(s) , optional These variables are treated as confounding and are removed from the `x` or `y` variables before plotting. lowess : boolean, optional Fit a lowess smoother to the residual scatterplot. order : int, optional Order of the polynomial to fit when calculating the residuals. robust : boolean, optional Fit a robust linear regression when calculating the residuals. dropna : boolean, optional If True, ignore observations with missing data when fitting and plotting. label : string, optional Label that will be used in any plot legends. color : matplotlib color, optional Color to use for all elements of the plot. {scatter, line}_kws : dictionaries, optional Additional keyword arguments passed to scatter() and plot() for drawing the components of the plot. ax : matplotlib axis, optional Plot into this axis, otherwise grab the current axis or make a new one if not existing. Returns ------- ax: matplotlib axes Axes with the regression plot. See Also -------- regplot : Plot a simple linear regression model. jointplot : Draw a :func:`residplot` with univariate marginal distributions (when used with ``kind="resid"``). Examples -------- .. include:: ../docstrings/residplot.rst N)rArMrPrErFr2rUrV)rTFr:z.2)lsc) r?r>r?rrBrCrOrLaxhliner@r7)rrBrCrErFrOrMrPr2rVrUrrr6rArrs r%r r Vsz!At',V+4 (.e5JG  z WWY''WYY'7JAtQ D GIJJqSDJ!$+"1A1A1CK%r8==?H LL[(+ Ir8r=r4)"r<r@textwraprrnumpyrpandasr- matplotlibrmatplotlib.pyplotpyplotr> statsmodelsrz ImportErrorrrrpaxisgridrr__all__r r?r5_regression_docsupdater r|r r r=r8r%rTs?  ; , -*"*"ZtOtOn    &   ':  ;H  IR  S\ ]l  mx  yH  IP  QZ [j  kv  wF  GR S\ ]f gt u@ $  d$D 4!S  Dd4 tTb TU5 u D4T TQhYYY r sY$r#sY$z DT $2d$ QuU $$ $ d3t6H&HH P QH$P#QH$X VDd5 E$d$t VQs GGG