o
    EhN                     @   sH  d dl m Z mZ d dlZd dlZd dlmZ d dlmZm	Z	 d dl
mZ d dlZd dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ ejd
d Zejdd Zejdddgdd Zdd Z dd Z!dd Z"ejdd dde#ddgfdde#ddgfdddgfd dde#dgfd ddgfdde#dgfgdd  Z$ejdd!d" d#d" d$d" d%d" d&d" gd'd( Z%d)d* Z&ejdg d+g d,d-g d.g d,d-e'g d/e'g d.e'g d,d0gd1d2 Z(d3d4 Z)d5d6 Z*d7d8 Z+d9d: Z,ejj-ed;d<d=d> Z.ejdi d?g igd@dA Z/ddCdDZ0ddEdFZ1dGdH Z2ejdIdJdKgejdLg dMdNdO Z3dPdQ Z4ddRdSZ5dTdU Z6dVdW Z7dXdY Z8dZd[ Z9ejd\g d]d^d_fg d]d`d_fg d]dadbfg d]dcdbfg d]dddefg d]dfdefg dgdhdifg dgdjdifg dkdldmfg dkdndmfg doej:ej;dpdqfg dodrdqfe dsdde dsdddgdtdufe dsdde dsdddgdvdwfe dsddej<dxe dsddej<dxdgdydzfgde=d{e>d|e>dBdfd}d~Z?ejd\g dd^d_fg dd`d_fg ddadbfg ddcdbfg ddddefg ddfdefg ddhdifg ddjdifg ddldmfg ddndmfg dej:ej;dpdqfg ddrdqfe dsdde dsdde dsddgdtdufe dsdde dsdde dsddgdvdwfe dsddej<dxe dsddej<dxe dsddej<dxgdydzfgde=d{e>d|e>dBdfddZ@dddZAdddZBdd ZCdd ZDdS )    )datetimetimezoneN)iNaT)is_ci_environmentis_platform_windows)np_version_lt1p23)PandasColumn)ColumnNullType	DtypeKindfrom_dataframe)ArrowCTypesc                   C   s.   t jtdd ddt jtdd dddS )NZtestdata   T)orderedF)r   	unordered)pdCategoricallist r   r   Blib/python3.10/site-packages/pandas/tests/interchange/test_impl.pydata_categorical   s   r   c                   C   s   dddddt jgiS )Nseparator datazabC|DeF,Hikz234,3245.67zgSaf,qWer|Grez
asd3,4sad|)npnanr   r   r   r   string_data#   s   r   data)r   T)r   Fc                 C   s   t d|| d  i}| d}|jd tjksJ |jdks#J |jt	j
dfks-J | dks5J |j}|d | d ksBJ |d du sJJ t|d tsSJ t|d jt g d	 t|t|  d S )
NAr      Z
is_orderedZis_dictionaryT
categories)adest)r   	DataFrame__dataframe__get_column_by_namedtyper
   ZCATEGORICAL
null_countdescribe_nullr	   USE_SENTINELZ
num_chunksZdescribe_categorical
isinstancer   tmZassert_series_equalZ_colSeriesassert_frame_equalr   )r   r   dfcolZdesc_catr   r   r   test_categorical_dtype0   s   r2   c                  C   sl   t dd} g d}| d| | i}| }t|}tj|g dd}t	d|i}t
|| d S )Npyarrow11.0.0)	MonTuer5   Wedr5   ThuFriSatSunweekday)r5   r6   r7   r8   r9   r:   r;   )r   )pytestimportorskiptablearraydictionary_encoder&   r   r   r   r%   r-   r/   )paarrr?   exchange_dfresultr<   expectedr   r   r   test_categorical_pyarrowD   s   rG   c                  C   sj   t dd} d g}| d| |d i}| }tjj	|}t
dttjgi}t|| d S )Nr3   r4   rC   float64)r=   r>   r?   r@   rA   r&   r   apiinterchanger   r%   r   r   r   r-   r/   rB   rC   r?   rD   rE   rF   r   r   r   test_empty_categorical_pyarrowS   s   rL   c                  C   sv   t dd} ddg}| d| |di}| }t|}tdddgi}t	|| | j
| j||s9J d S )Nr3   r4   r5   r6   r<   large_string)r=   r>   r?   r@   r&   r   r   r%   r-   r/   TableequalsrJ   rK   r   r   r   test_large_string_pyarrow_   s   rP   )offsetlengthexpected_valuesffffff
@r    @r      c           	      C   sr   t dd}g d}|d|i| |}| }t|}td|i}t	|| |j
|j||s7J d S )Nr3   r4   )rT   NrU   rC   )r=   r>   r?   slicer&   r   r   r%   r-   r/   rN   rO   rJ   )	rQ   rR   rS   rB   rC   r?   rD   rE   rF   r   r   r   test_bitmasks_pyarrown   s   rX   c                   C      t jdddS )NrV   id   r   randomdefault_rngintegersr   r   r   r   <lambda>       r_   c                   C   rY   )NrV   r   rZ   r[   r   r   r   r   r_      r`   c                   C   s   t jd S )NrV   )r   r\   r]   r   r   r   r   r_      s    c                   C   s   t jdddgS )NrV   TF)r   r\   r]   choicer   r   r   r   r_      s    c                   C   s>   t tjdddtjdddtjddddS )NrV   il  i4  r         )yearmonthday)r   r   r\   r]   r^   r   r   r   r   r_      s
    c                    s   d\  fddt  D t}| }|  ks"J | ks*J t| t ks8J d}t	fdd|D }t
||}t
||}t|| t|jd tscJ t|jd tsmJ d S )N)
   rc   c                    s>   i | ]}d t | d    d  fddtD qS )r1   rV   r   c                    s   g | ]}  qS r   r   ).0_r   r   r   
<listcomp>   s    z-test_dataframe.<locals>.<dictcomp>.<listcomp>)intrange)rh   iZNCOLSZNROWSr   r   r   
<dictcomp>   s    0z"test_dataframe.<locals>.<dictcomp>)r   rV   c                 3   s     | ]}t   | V  qd S N)r   keys)rh   idxrj   r   r   	<genexpr>   s    z!test_dataframe.<locals>.<genexpr>Z_INTERCHANGE_PROTOCOL_BUFFERS)rm   r   r%   r&   Znum_columnsZnum_rowsr   column_namesrr   tupler   Zselect_columnsZselect_columns_by_namer-   r/   r,   attrs)r   r0   df2indicesnamesrE   rF   r   ro   r   test_dataframe   s    
r{   c                     s   t tg dtg dtg dd tjd fdd jD } |  D ]\}} jj	t
t |dd	 }d  j||f< q-  }|d
j| d
 ksYJ |dj| d kseJ |dj| d ksqJ d S )N)      ?       @g      @g      @        )      ?      @      @g      @r   )r|   r~   r|   r|   r|   xyzrV   c                    s    i | ]}|j d t dqS )r   )ZlowZhigh)r^   len)rh   r1   r0   rngr   r   rp      s     z,test_missing_from_masked.<locals>.<dictcomp>F)sizereplacer   r   r   )r   r%   r   r@   r\   r]   columnsitemsindexra   Zaranger   locr&   r'   r)   )Z	dict_nullr1   Z	num_nullsZnull_idxrx   r   r   r   test_missing_from_masked   s"   r   )r   r   r   )ffffff"@      %@皙'@)r   r   )r   rV   r   )TTFr   c                 C   s6   t | }| }|jD ]}||jdksJ qd S )Nr   )r   r%   r&   r   r'   r)   )r   r0   rx   col_namer   r   r   test_mixed_data   s
   

r   c                  C   s\   t tg dtg dtg dd} |  }| jD ]}||jdks+J qd S )N)TNFNT)NrV   Nr   rV   )r   r   Nr   Nr   rV   )r   r%   r   r@   r&   r   r'   r)   )r0   rx   r   r   r   r   test_mixed_missing   s   
r   c                 C   s   | d dg }t d|i}| d}| dksJ |jdks$J |jd tjks.J |j	t
jdfks8J |dd  }| d}| dksMJ |jdksTJ |jd tjks^J |j	t
jdfkshJ d S )Nr    r      r   r      )r   r%   r&   r'   r   r)   r(   r
   STRINGr*   r	   ZUSE_BYTEMASK)r   Ztest_str_datar0   r1   Z	df_slicedr   r   r   test_string   s   r   c                  C   sZ   t dg di} |  d}tjtdd |j W d    d S 1 s&w   Y  d S )Nr   )r    rg   r|   r   znot supported yetmatch)r   r%   r&   r'   r=   raisesNotImplementedErrorr(   r0   r1   r   r   r   test_nonstring_object   s
   "r   c                  C   s   t dt dt jgi} |  d}| dksJ |jdks#J |jd t	j
ks-J |jtjtfks7J t| t|   d S )Nr   z
2022-01-01rV   r   r   )r   r%   	TimestampNaTr&   r'   r   r)   r(   r
   ZDATETIMEr*   r	   r+   r   r-   r/   r   r   r   r   r   test_datetime  s   r   zNumpy > 1.23 required)reasonc                  C   s^   t dt g di} |  d}t| d d }tjg ddd}t	
|| d S )Nr   )r    br    r   r   )r   r   r   int8r(   )r   r%   r   r&   r'   r   Zfrom_dlpackget_buffersr@   r-   Zassert_numpy_array_equal)r0   r1   rE   rF   r   r   r    test_categorical_to_numpy_dlpack  s
   r   r    c                 C   sF   t dd ddlm} t| }||}t|}tj||dd d S )Nr3   r4   r   r   F)Zcheck_column_type)r=   r>   pyarrow.interchanger   r   r%   r-   r/   )r   Zpa_from_dataframerF   Zarrow_dfrE   r   r   r   test_empty_pyarrow  s   
r   returnc                  C   s~   t dd} | g dg dg}dg}| j|g|d}t jtdd tjjj	|d	d
 W d    d S 1 s8w   Y  d S )Nr3   r4   )rV   rV      )r   r   rZ   n_legs)rz   z@Cannot do zero copy conversion into multi-column DataFrame blockr   FZ
allow_copy)
r=   r>   Zchunked_arrayr?   r   RuntimeErrorr   rI   rJ   r   )rB   r   rz   r?   r   r   r   test_multi_chunk_pyarrow&  s   "r   c                  C   s   t dd tjg ddd} tj| | gddd}| }t jtd	d
 tj	j
|jdd W d    n1 s<w   Y  tj	j
|jdd}tjdg didd}t|| t|| t|d jjjdkspJ t|d jjjdks}J d S )Nr3   r4   r   rV   NInt64[pyarrow]r   T)Zignore_indexr    z:Found multi-chunk pyarrow array, but `allow_copy` is Falser   Fr   )r|   r}   Nr|   r}   NrH   rV   )r=   r>   r   r.   concatto_framecopyr   r   rI   rJ   r   r&   r%   r-   r/   r   r@   Z	_pa_arraychunks)Zserr0   Zdf_origrE   rF   r   r   r   test_multi_chunk_column2  s   r   c                  C   s|   t dd ddddddd} tjtd	i | gddd }| }tjj	|d 
 }tjd	i | }||ks<J d S )
Nr3   r4   i  r   )rd   re   rf   ZhourZminutesecondtimestamp[ns][pyarrow]Zcol0)r(   namer   )r=   r>   r   r.   r   r   r&   rI   rJ   r   itemr   )Ztimestamp_argsr0   dfirE   rF   r   r   r   test_timestamp_ns_pyarrowG  s&   r   tzZUTCz
US/Pacificunit)r#   msusnsc                 C   sB   t jdddd| |}t d|i}t|t|  d S )Nz
2018-01-01r   D)ZperiodsZfreqZts_tz)	r   Z
date_rangeZtz_localizeZas_unitr%   r-   r/   r   r&   )r   r   Ztz_datar0   r   r   r   test_datetimetzdtype_  s   r   c           	      C   s   t dd}dd lm} t r t r t jj|jdd}| 	| |
tdddd tdddg}||d	}|d
|i}| }t|}tjg dd
gdd}t|| d S )Nr3   r4   r   z_TODO: Set ARROW_TIMEZONE_DATABASE environment variable on CI to path to the tzdata for pyarrow.)r   r     r   rV   Asia/KathmandurC   )z2020-01-01 00:00:00+05:45r   z2020-01-02 00:00:00+05:45zdatetime64[us, Asia/Kathmandu])r   r(   )r=   r>   Zpyarrow.computeZcomputer   r   markZxfailZArrowInvalidZapplymarkerr@   r   Zassume_timezoner?   r&   r   r   r%   r-   r/   )	ZrequestrB   Zpcr   rC   r?   rD   rE   rF   r   r   r   )test_interchange_from_non_pandas_tz_awarej  s&   
 r   c                    s   t dddgi }| d   d }|d }tjdtj|d f}|d |f d<  fd	d
_fdd
_| 	|dfdd
 t j
j| d S )Nr    ZfooZbarr   r         r   c                          S rq   r   r   )buffersr   r   r_         z?test_interchange_from_corrected_buffer_dtypes.<locals>.<lambda>c                    r   rq   r   )ri   )columnr   r   r_     r   r&   c                    r   rq   r   r   )rJ   r   r   r_     r   )r   r%   r&   r'   r   r
   ZUINTr   ZUINT8setattrrI   rJ   r   )Zmonkeypatchr0   Zbuffers_dataZbuffer_dtyper   )r   r   rJ   r   -test_interchange_from_corrected_buffer_dtypes  s    
r   c                  C   s8   t jdg itd} |  }t jj|}t| | d S )Nr    r   )	r   r%   strr&   rI   rJ   r   r-   r/   )r0   rx   rE   r   r   r   test_empty_string_column  s   r   c                  C   sT   t d tjddgidd} tjj|  }tjddgidd}t	|| d S )Nr3   r    r   large_string[pyarrow]r   r   )
r=   r>   r   r%   rI   rJ   r   r&   r-   r/   )r0   rE   rF   r   r   r   test_large_string  s
   
r   c                  C   s4   t jg ddd } |   }|dgksJ d S )Nr   rV   r   r   )r   0)r   r.   r   r&   ru   )r0   rz   r   r   r   test_non_str_names  s   r   c                  C   sf   t g dg dd} |  }tjtdd t jjj|dd W d    d S 1 s,w   Y  d S )Nr   r   r   r   )r   r   a&  Expected a Series, got a DataFrame. This likely happened because you called __dataframe__ on a DataFrame which, after converting column names to string, resulted in duplicated names: Index\(\['0', '0'\], dtype='(str|object)'\). Please rename these columns before using the interchange protocol.r   Fr   )	r   r%   r&   r=   r   	TypeErrorrI   rJ   r   )r0   r   r   r   r   test_non_str_names_w_duplicates  s   
"r   )r   r(   expected_dtyper   ZInt64Zint64r   ZInt8r   zInt8[pyarrow]ZUInt64Zuint64zUInt64[pyarrow])r|         @NZFloat32Zfloat32zFloat32[pyarrow])TFNZbooleanboolzboolean[pyarrow])much adoaboutN)Zna_valuerM   zstring[pyarrow]r   r   ztimestamp[ns]ztimestamp[us][pyarrow]ztimestamp[us])Ztzinfoz&timestamp[us, Asia/Kathmandu][pyarrow] timestamp[us, tz=Asia/Kathmandu]r(   r   c                 C   s   t dd}dd lm} |dkr|dd}tjd| i|d}|| d }|j	|ks/J |d 
 | d ks;J |d	 
 | d	 ksGJ |d
 
 d u sQJ d S Nr3   r4   r   r   r   r   r    r   r   rV   r=   r>   r   rJ   Z	timestampr   r%   r   r&   typeZas_pyr   r(   r   rB   Zpair0   rE   r   r   r   (test_pandas_nullable_with_missing_values  s   1r   r   )r|   r   g      @)TFF)r   r   Znothingr   c                 C   s   t dd}dd lm} |dkr|dd}tjd| i|d}|| d }|j	|ks/J |d 
 | d ks;J |d	 
 | d	 ksGJ |d
 
 | d
 ksSJ d S r   r   r   r   r   r   +test_pandas_nullable_without_missing_values  s   4r   c                  C   sF   t dd tjddgidd} |  d d }|d u s!J d S )Nr3   r4   r    r   r   r   validity)r=   r>   r   r%   r&   r'   r   )r0   rE   r   r   r   test_string_validity_bufferF  s   r   c                  C   sl   t dd tjddd gidd} |  d d }|d us"J |d }tjdt	jd	f}||ks4J d S )
Nr3   r4   r    r   r   r   r   r   =)
r=   r>   r   r%   r&   r'   r   r
   ZBOOLr   )r0   r   rE   rF   r   r   r   &test_string_validity_buffer_no_missingN  s   r   c                  C   sN   t jdg idd} |  }t jjj|dd}t jdg idd}t|| d S )Nr    r   r   Fr   )r   r%   r&   rI   rJ   r   r-   r/   )r0   r   rE   rF   r   r   r   test_empty_dataframeY  s
   r   c                  C   sL   t dd} dddgg dgi}| |}t|}t|}t|| d S )Nr3   z14.0.0r    r   rV   r   )r=   r>   r?   r   r   r%   r-   r/   )rB   r   ZtblrE   rF   r   r   r   test_from_dataframe_list_dtypeb  s   

r   )r   N)Er   r   Znumpyr   r=   Zpandas._libs.tslibsr   Zpandas.compatr   r   Zpandas.compat.numpyr   Zpandasr   Zpandas._testingZ_testingr-   Zpandas.core.interchange.columnr   Z*pandas.core.interchange.dataframe_protocolr	   r
   Z&pandas.core.interchange.from_dataframer   Zpandas.core.interchange.utilsr   Zfixturer   r   r   Zparametrizer2   rG   rL   rP   floatrX   r{   r   r@   r   r   r   r   r   Zskipifr   r   r   r   r   r   r   r   r   r   r   r   ZStringDtyper   Zutcr   r   r   r   r   r   r   r   r   r   r   r   <module>   sZ   








	


	
	,  0

	