U
     )3g:Y                     @  s0  d dl mZ d dlZd dlmZ d dlmZ d dlmZ d dlm	Z	 d dlm
Z
 d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlm Z  d dlm!Z! d dl"m#Z# e	rd dl$m%Z% d dl&Z'd dl(m)Z) d dl(m*Z* d d l+m,Z, d d!l-m.Z. ed"Z/G d#d$ d$eZ0d%d%d%d&d'd(Z1d%d%d%d)d*d+Z2d,d-d.d/d0Z3d,d,d1d2d3Z4d4d5d1d6d7Z5d8d9d:d;d<Z6d,d,d5d=d>d?Z7d@dAdBdCdDZ8d"dEd"dFdGdHZ9d"dIdJdKdLZ:d"dMd"dNdOdPZ;d"d"dJdQdRZ<dSd,d5dTdUdVZ=d"d5dWd"dXdYdZZ>d[d5d\d]d^Z?d_d`d%dadbdcZ@d_d`d%dadddeZAd,dfd5d`dgdhdiZBd,djdkdldmZCd_dndodpZDd%d%dAdqdrdsZEdtdtd5d5d5dudvdwZFdS )x    )annotationsN)Enum)auto)	token_hex)TYPE_CHECKING)Any)Iterable)Sequence)TypeVar)cast)warn)ColumnNotFoundError)get_cudf)get_dask_dataframe)	get_modin)
get_pandas)
get_polars)get_pyarrow)is_cudf_series)is_modin_series)is_pandas_dataframe)is_pandas_like_dataframe)is_pandas_like_series)is_pandas_series)is_polars_series)is_pyarrow_chunked_array)	to_native)
ModuleType)Self)	TypeGuard)	BaseFrameSeriesTc                   @  s\   e Zd Ze Ze Ze Ze Ze Ze Z	e Z
eddd dddZddddd	Zd
S )Implementationz
type[Self]r   )clsnative_namespacereturnc                 C  sB   t  tjt tjt tjt tjt	 tj
t tji}||tjS )zAInstantiate Implementation object from a native namespace module.)r   r$   PANDASr   MODINr   CUDFr   PYARROWr   POLARSr   DASKgetUNKNOWN)r%   r&   mapping r1   2/tmp/pip-unpacked-wheel-hfsjijke/narwhals/utils.pyfrom_native_namespace7   s          z$Implementation.from_native_namespacer   )selfr'   c                 C  s<   t jt t jt t jt t jt t j	t
 t jt i}||  S )zCReturn the native namespace module corresponding to Implementation.)r$   r(   r   r)   r   r*   r   r+   r   r,   r   r-   r   )r4   r0   r1   r1   r2   to_native_namespaceF   s          z"Implementation.to_native_namespaceN)__name__
__module____qualname__r   r(   r)   r*   r+   r,   r-   r/   classmethodr3   r5   r1   r1   r1   r2   r$   -   s   r$   str)textprefixr'   c                 C  s   |  |r| t|d  S | S N)
startswithlen)r;   r<   r1   r1   r2   remove_prefixS   s    
r@   )r;   suffixr'   c                 C  s    |  |r| d t|  S | S r=   )endswithr?   )r;   rA   r1   r1   r2   remove_suffixY   s    
rC   r   z	list[Any])argsr'   c                 C  s,   | sg S t | dkr(t| d r(| d S | S )N   r   )r?   _is_iterable)rD   r1   r1   r2   flatten_   s
    rG   )argr'   c                 C  s   t | ttfs| fS | S r=   )
isinstancelisttuple)rH   r1   r1   r2   tupleifyg   s    rL   zAny | Iterable[Any]boolc                 C  s   ddl m} t| st| r4dt|  d}t|t  }d k	rtt| |j|j|j	|j
frtdt|  d}t|t| tot| tt|f S )Nr   r!   z(Expected Narwhals class or scalar, got: z2. Perhaps you forgot a `nw.from_native` somewhere?z`.

Hint: Perhaps you
- forgot a `nw.from_native` somewhere?
- used `pl.col` instead of `nw.col`?)narwhals.seriesr"   r   r   type	TypeErrorr   rI   ZExpr	DataFrame	LazyFramer   r:   bytes)rH   r"   msgplr1   r1   r2   rF   m   s     rF   zSequence[str | int]ztuple[int, ...])versionr'   c                 C  s&   t | tr| d} tdd | D S )zASimple version parser; split into a tuple of ints for comparison..c                 s  s$   | ]}t td dt|V  qdS )z\D N)intresubr:   ).0vr1   r1   r2   	<genexpr>   s     z parse_version.<locals>.<genexpr>)rI   r:   splitrK   )rV   r1   r1   r2   parse_version   s    

r`   )objr%   r'   c                 C  s4   ddl m} t| |r t| |S t| |p2t| |S )Nr   )DType)Znarwhals.dtypesrb   rI   
issubclass)ra   r%   rb   r1   r1   r2   isinstance_or_issubclass   s    

rd   zIterable[Any]None)itemsr'   c                   sX   ddl m  ddl m t fdd| D sDtfdd| D rHd S d}t|d S )Nr   rQ   rR   c                 3  s   | ]}t | V  qd S r=   rI   r\   itemrg   r1   r2   r^      s     z$validate_laziness.<locals>.<genexpr>c                 3  s   | ]}t | V  qd S r=   ri   rj   rh   r1   r2   r^      s     z@The items to concatenate should either all be eager, or all lazy)narwhals.dataframerQ   rR   allNotImplementedError)rf   rT   r1   )rQ   rR   r2   validate_laziness   s    ro   zSeries | BaseFrame[Any])lhsrhsr'   c                 C  s  ddl m} ddlm} ddddd}tt| }tt|}tt|d	d
|rtt|d	d
|r||jj	j
 ||jj	j
 ||j|jj	j|jj	j
 S tt|d	d
|rtt|dd
|r||jj	j
 ||jjj
 ||j|jj	j|jjj
 S tt|dd
|rjtt|d	d
|rj||jjj
 ||jj	j
 ||j|jjj|jj	j
 S tt|dd
|rtt|dd
|r||jjj
 ||jjj
 ||j|jjj|jjj
 S t|t|krdt| dt| }t|| S )aO  
    Align `lhs` to the Index of `rhs`, if they're both pandas-like.

    Notes:
        This is only really intended for backwards-compatibility purposes,
        for example if your library already aligns indices for users.
        If you're designing a new library, we highly encourage you to not
        rely on the Index.
        For non-pandas-like inputs, this only checks that `lhs` and `rhs`
        are the same length.

    Examples:
        >>> import pandas as pd
        >>> import polars as pl
        >>> import narwhals as nw
        >>> df_pd = pd.DataFrame({"a": [1, 2]}, index=[3, 4])
        >>> s_pd = pd.Series([6, 7], index=[4, 3])
        >>> df = nw.from_native(df_pd)
        >>> s = nw.from_native(s_pd, series_only=True)
        >>> nw.to_native(nw.maybe_align_index(df, s))
           a
        4  2
        3  1
    r   )PandasLikeDataFrame)PandasLikeSeriesr   re   )indexr'   c                 S  s   | j sd}t|d S )Nz'given index doesn't have a unique index)Z	is_unique
ValueError)rt   rT   r1   r1   r2   _validate_index   s    z*maybe_align_index.<locals>._validate_index_compliant_frameN_compliant_seriesz6Expected `lhs` and `rhs` to have the same length, got z and )Znarwhals._pandas_like.dataframerr   Znarwhals._pandas_like.seriesrs   r   r   rI   getattrrw   Z_native_framert   _from_compliant_dataframe_from_native_framelocrx   _native_series_from_compliant_series_from_native_seriesr?   ru   )rp   rq   rr   rs   rv   Zlhs_anyZrhs_anyrT   r1   r1   r2   maybe_align_index   s    


 
 
 
 r   z
Any | None)ra   r'   c                 C  s,   t t| }t|}t|s"t|r(|jS dS )ae  
    Get the index of a DataFrame or a Series, if it's pandas-like.

    Notes:
        This is only really intended for backwards-compatibility purposes,
        for example if your library already aligns indices for users.
        If you're designing a new library, we highly encourage you to not
        rely on the Index.
        For non-pandas-like inputs, this returns `None`.

    Examples:
        >>> import pandas as pd
        >>> import polars as pl
        >>> import narwhals as nw
        >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 5]})
        >>> df = nw.from_native(df_pd)
        >>> nw.maybe_get_index(df)
        RangeIndex(start=0, stop=2, step=1)
        >>> series_pd = pd.Series([1, 2])
        >>> series = nw.from_native(series_pd, series_only=True)
        >>> nw.maybe_get_index(series)
        RangeIndex(start=0, stop=2, step=1)
    N)r   r   r   r   r   rt   )ra   obj_any
native_objr1   r1   r2   maybe_get_index   s
    
r   zstr | list[str])dfcolumn_namesr'   c                 C  s6   t t| }t|}t|r2||j||S |S )a  
    Set columns `columns` to be the index of `df`, if `df` is pandas-like.

    Notes:
        This is only really intended for backwards-compatibility purposes,
        for example if your library already aligns indices for users.
        If you're designing a new library, we highly encourage you to not
        rely on the Index.
        For non-pandas-like inputs, this is a no-op.

    Examples:
        >>> import pandas as pd
        >>> import polars as pl
        >>> import narwhals as nw
        >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 5]})
        >>> df = nw.from_native(df_pd)
        >>> nw.to_native(nw.maybe_set_index(df, "b"))  # doctest: +NORMALIZE_WHITESPACE
           a
        b
        4  1
        5  2
    )r   r   r   r   rz   rw   r{   Z	set_index)r   r   Zdf_anyZnative_framer1   r1   r2   maybe_set_index  s    
r   c                 C  s   t t| }t|}t|rJ| }t||r0|S ||j|j	ddS t
|r| }t||rh|S ||j|j	ddS |S )a  
    Reset the index to the default integer index of a DataFrame or a Series, if it's pandas-like.

    Notes:
        This is only really intended for backwards-compatibility purposes,
        for example if your library already resets the index for users.
        If you're designing a new library, we highly encourage you to not
        rely on the Index.
        For non-pandas-like inputs, this is a no-op.

    Examples:
        >>> import pandas as pd
        >>> import polars as pl
        >>> import narwhals as nw
        >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 5]}, index=([6, 7]))
        >>> df = nw.from_native(df_pd)
        >>> nw.to_native(nw.maybe_reset_index(df))
           a  b
        0  1  4
        1  2  5
        >>> series_pd = pd.Series([1, 2])
        >>> series = nw.from_native(series_pd, series_only=True)
        >>> nw.maybe_get_index(series)
        RangeIndex(start=0, stop=2, step=1)
    T)Zdrop)r   r   r   r   Z__native_namespace___has_default_indexrz   rw   r{   Zreset_indexr   r~   rx   r   )ra   r   r   r&   r1   r1   r2   maybe_reset_index7  s&    



r   zpd.Series | pd.DataFrame)native_frame_or_seriesr&   r'   c                 C  s4   | j }t||jo2|jdko2|jt|ko2|jdkS )Nr   rE   )rt   rI   Z
RangeIndexstartstopr?   step)r   r&   rt   r1   r1   r2   r   f  s    r   z
bool | str)ra   rD   kwargsr'   c                 O  sZ   t t| }t|}t|r4||j|j||S t|rV|	|j
|j||S |S )a  
    Convert columns or series to the best possible dtypes using dtypes supporting ``pd.NA``, if df is pandas-like.

    Arguments:
        obj: DataFrame or Series.
        *args: Additional arguments which gets passed through.
        **kwargs: Additional arguments which gets passed through.

    Notes:
        For non-pandas-like inputs, this is a no-op.
        Also, `args` and `kwargs` just get passed down to the underlying library as-is.

    Examples:
        >>> import pandas as pd
        >>> import polars as pl
        >>> import narwhals as nw
        >>> import numpy as np
        >>> df_pd = pd.DataFrame(
        ...     {
        ...         "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")),
        ...         "b": pd.Series([True, False, np.nan], dtype=np.dtype("O")),
        ...     }
        ... )
        >>> df = nw.from_native(df_pd)
        >>> nw.to_native(nw.maybe_convert_dtypes(df)).dtypes  # doctest: +NORMALIZE_WHITESPACE
        a             Int32
        b           boolean
        dtype: object
    )r   r   r   r   rz   rw   r{   Zconvert_dtypesr   r~   rx   r   )ra   rD   r   r   r   r1   r1   r2   maybe_convert_dtypesr  s    


r   r"   )seriesr'   c                 C  s   ddl m} | jj}t| j|r:| j|jkr:| jjjd S | j|j	krJdS | j|jkrZdS t
| }t|rv|jjdkS t|r|jjS t|r|jjS t|r|jjS t|r|jjS dS )a  
    Return whether indices of categories are semantically meaningful.

    This is a convenience function to accessing what would otherwise be
    the `is_ordered` property from the DataFrame Interchange Protocol,
    see https://data-apis.org/dataframe-protocol/latest/API.html.

    - For Polars:
      - Enums are always ordered.
      - Categoricals are ordered if `dtype.ordering == "physical"`.
    - For pandas-like APIs:
      - Categoricals are ordered if `dtype.cat.ordered == True`.
    - For PyArrow table:
      - Categoricals are ordered if `dtype.type.ordered == True`.

    Examples:
        >>> import narwhals as nw
        >>> import pandas as pd
        >>> import polars as pl
        >>> data = ["x", "y"]
        >>> s_pd = pd.Series(data, dtype=pd.CategoricalDtype(ordered=True))
        >>> s_pl = pl.Series(data, dtype=pl.Categorical(ordering="physical"))

        Let's define a library-agnostic function:

        >>> @nw.narwhalify
        ... def func(s):
        ...     return nw.is_ordered_categorical(s)

        Then, we can pass any supported library to `func`:

        >>> func(s_pd)
        True
        >>> func(s_pl)
        True
    r   )InterchangeSeriesZ
is_orderedTFZphysical)Znarwhals._interchange.seriesr   rx   Z_dtypesrI   ZdtypeZCategoricalr}   Zdescribe_categoricalr   r   r   Zorderingr   catZorderedr   r   r   rO   )r   r   ZdtypesZnative_seriesr1   r1   r2   is_ordered_categorical  s2    %

r   rY   z	list[str])n_bytescolumnsr'   c                 C  s   t dtdd t| |dS )Nz}Use `generate_temporary_column_name` instead. `generate_unique_token` is deprecated and it will be removed in future versions   )
stacklevelr   r   )r   DeprecationWarninggenerate_temporary_column_namer   r1   r1   r2   generate_unique_token  s    r   c                 C  sF   d}t | }||kr|S |d7 }|dkrd| d| }t|qdS )a  Generates a unique token of specified `n_bytes` that is not present in the given
    list of columns.

    It relies on [python secrets token_hex](https://docs.python.org/3/library/secrets.html#secrets.token_hex)
    function to return a string nbytes random bytes.

    Arguments:
        n_bytes: The number of bytes to generate for the token.
        columns: The list of columns to check for uniqueness.

    Returns:
        A unique token that is not present in the given list of columns.

    Raises:
        AssertionError: If a unique token cannot be generated after 100 attempts.

    Examples:
        >>> import narwhals as nw
        >>> columns = ["abc", "xyz"]
        >>> nw.generate_temporary_column_name(n_bytes=8, columns=columns) not in columns
        True
    r   rE   d   zMInternal Error: Narwhals was not able to generate a column name with n_bytes=z and not in N)r   AssertionError)r   r   countertokenrT   r1   r1   r2   r     s    r   zIterable[str])compliant_framer   strictr'   c                 C  sT   t | j}t|}|r>|D ] }||krd| d}t|qnt|t |}|S )N"z" not found)setr   rJ   r   intersection)r   r   r   colsZto_dropdrT   r1   r1   r2   parse_columns_to_drop  s    
r   zTypeGuard[Sequence[Any]])sequencer'   c                 C  s   t | tot | t S r=   )rI   r	   r:   )r   r1   r1   r2   is_sequence_but_not_str&  s    r   )r'   c                  C  s   ddl } ddlm} ddl}t||jj}|  }d}zL|r| |}|	|snt
|jdd }r|	dr|j}|d7 }q:qq:W 5 ~X |S )z
    Find the first place in the stack that is not inside narwhals.

    Taken from:
    https://github.com/pandas-dev/pandas/blob/ab89c53f48df67709a533b6a95ce3d911871a0a8/pandas/util/_exceptions.py#L30-L51
    r   N)PathZco_qualnamezsingledispatch.rE   )inspectpathlibr   Znarwhalsr:   __file__parentcurrentframegetfiler>   ry   f_codef_back)r   r   nwZpkg_dirframenfnamequalnamer1   r1   r2   find_stacklevel*  s(    


r   )message_versionr'   c                 C  s   t | tt d dS )z
    Issue a deprecation warning.

    Parameters
    ----------
    message
        The message associated with the warning.
    version
        Narwhals version when the warning was introduced. Just used for internal
        bookkeeping.
    )r   categoryr   N)r   r   r   )r   r   r1   r1   r2   issue_deprecation_warningR  s    r   zbool | None)r   pass_throughpass_through_defaultemit_deprecation_warningr'   c                C  sd   | d kr|d kr|}nJ| d k	rB|d krB|r:d}t |dd |  }n| d krT|d k	rTnd}t||S )Nz`strict` in `from_native` is deprecated, please use `pass_through` instead.

Note: `strict` will remain available in `narwhals.stable.v1`.
See https://narwhals-dev.github.io/narwhals/backcompat/ for more information.
z1.13.0)r   z,Cannot pass both `strict` and `pass_through`)r   ru   )r   r   r   r   rT   r1   r1   r2   validate_strict_and_pass_thougha  s    r   )G
__future__r   rZ   enumr   r   Zsecretsr   typingr   r   r   r	   r
   r   warningsr   Znarwhals._exceptionsr   Znarwhals.dependenciesr   r   r   r   r   r   r   r   r   r   r   r   r   r   Znarwhals.translater   typesr   ZpandaspdZtyping_extensionsr   r   rl   r    rN   r"   r#   r$   r@   rC   rG   rL   rF   r`   rd   ro   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r1   r1   r1   r2   <module>   st   &Y"//C
&(