o
    Eh                     @   s  d Z ddlZddlmZ ddlmZ ddlZddlZddlZ	ddl
Z
ddlmZmZ ddlmZ ddlmZ ddlmZmZmZmZmZ ddlZddlmZ dd	lmZ dd
lm Z m!Z!m"Z"m#Z#m$Z$ zddl%Z%dZ&W n e'yu   dZ&Y nw zddl(Z(dZ)W n e'y   dZ)Y nw e
j*+de
j*+dgZ,e
j-e
j.de
j*j/e) peddddkdde
j*j0e dddgde
j.de
j*j/e& dddgddd Z1e
j-dd Z2e
j-dd  Z3e
j-d!d" Z4e
j-d#d$ Z5e
j-d%d& Z6e
j-ej7ej8j9ej7ej8j:ej7ej8j;ej<d'd(ej<d)d(ej<d*d(ej<d+d(gdd,d- Z=									.dMd/d0Z>d1d2 Z?d3d4 Z@d5d6 ZAd7d8 ZBd9d: ZCd;d< ZDd=d> ZEd?d@ ZFdAdB ZGdCdD ZHG dEdF dFZIG dGdH dHeIZJG dIdJ dJeIZKG dKdL dLeIZLdS )Nz test parquet compat     N)Decimal)BytesIO)using_copy_on_writeusing_string_dtype)_get_option)is_platform_windows)pa_version_under11p0pa_version_under13p0pa_version_under15p0pa_version_under19p0pa_version_under20p0)Version)FastParquetImplPyArrowImpl
get_engineread_parquet
to_parquetTFz2ignore:DataFrame._data is deprecated:FutureWarningz=ignore:Passing a BlockManager to DataFrame:DeprecationWarningfastparquetmode.data_managerZsilentarrayz4fastparquet is not installed or ArrayManager is usedreasonTODO(infer_string) fastparquetr   strictZmarkspyarrowpyarrow is not installed)paramsc                 C      | j S Nparamrequest r&   <lib/python3.10/site-packages/pandas/tests/io/test_parquet.pyengine@   s   r(   c                   C   s   t std dS )Nr   r   )_HAVE_PYARROWpytestskipr&   r&   r&   r'   pa]   s   
r,   c                 C   sJ   t std ntddddkrtd t r#| tjjddd	 d
S )Nzfastparquet is not installedr   Tr   r   z.ArrayManager is not supported with fastparquetr   Fr   r   )_HAVE_FASTPARQUETr*   r+   r   r   applymarkermarkxfailr$   r&   r&   r'   fpd   s   
r1   c                   C   s"   t jg dddt ddgdS )N         fooABr8   r9   columns)pd	DataFrameIndexr&   r&   r&   r'   	df_compatq   s   "r?   c               
   C   sB   t tdttddtjddddg dt jd	d
dd} | S )Nabcr3            @      @float64dtypeTFT20130101r5   periods)abdef)r<   r=   listrangenparange
date_range)dfr&   r&   r'   df_cross_compatv   s   rV   c                   C   s   t tddtjdgg dg dg dttddtd	d
dtjdddddtjdgg dt jdd	dt 	dt j
t 	dgdS )Nr@   rK   crK   NrW   )   foo   bars   bazr6   barbazr3   rA   r5      Zu1rB   rC   rD   rE          @      @rG   rH   rI   Z20130103)stringstring_with_nanZstring_with_nonebytesZunicodeintZuintfloatZfloat_with_nanbooldatetimedatetime_with_nat)r<   r=   rP   rR   nanrQ   rS   astyperT   	TimestampZNaTr&   r&   r&   r'   df_full   s$   

rl   z2019-01-04T16:41:24+0200z%Y-%m-%dT%H:%M:%S%zz2019-01-04T16:41:24+0215z2019-01-04T16:41:24-0200z2019-01-04T16:41:24-0215c                 C   r    r!   r"   r$   r&   r&   r'   timezone_aware_date_list   s   rm   r4   c
                    s   pddip	i du r|r|d< |d<  fdd}
du rFt  |
|	 W d   dS 1 s?w   Y  dS |
|	 dS )a  Verify parquet serializer and deserializer produce the same results.

    Performs a pandas to disk and disk to pandas round trip,
    then compares the 2 resulting DataFrames to verify equality.

    Parameters
    ----------
    df: Dataframe
    engine: str, optional
        'pyarrow' or 'fastparquet'
    path: str, optional
    write_kwargs: dict of str:str, optional
    read_kwargs: dict of str:str, optional
    expected: DataFrame, optional
        Expected deserialization result, otherwise will be equal to `df`
    check_names: list of str, optional
        Closed set of column names to be compared
    check_like: bool, optional
        If True, ignore the order of index & columns.
    repeat: int, optional
        How many times to repeat the test
    compressionNr(   c                    sZ   t | D ]&}jfi  tfi }dv r d jd< tj| d qd S )Nrb   r3   rb   )check_names
check_likecheck_dtype)rQ   r   r   loctmassert_frame_equal)repeat_Zactualrr   rq   rp   rU   expectedpathread_kwargswrite_kwargsr&   r'   compare   s   
z!check_round_trip.<locals>.compare)rt   ensure_clean)rU   r(   rz   r|   r{   ry   rp   rq   rr   rv   r}   r&   rx   r'   check_round_trip   s   "

"r   c                 C   s0   ddl m} |j| dd}|jjj|ksJ dS )zCheck partitions of a parquet file are as expected.

    Parameters
    ----------
    path: str
        Path of the dataset.
    expected: iterable of str
        Expected partition names.
    r   NZhive)partitioning)Zpyarrow.datasetdatasetr   schemanames)rz   ry   Zdsr   r&   r&   r'   check_partition_names   s   
r   c                 C   sD   d}t jt|d t| dd W d    d S 1 sw   Y  d S )Nz.engine must be one of 'pyarrow', 'fastparquet'matchr6   r\   )r*   raises
ValueErrorr   )r?   msgr&   r&   r'   test_invalid_engine  s   "r   c                 C   sP   |rt s| jd| _tdd t|  W d    d S 1 s!w   Y  d S )Nstrio.parquet.enginer   )r   r;   rj   r<   option_contextr   )r?   r,   using_infer_stringr&   r&   r'   test_options_py  s
   
"r   c                 C   :   t dd t|  W d    d S 1 sw   Y  d S )Nr   r   r<   r   r   )r?   r1   r&   r&   r'   test_options_fp     
"r   c                 C   r   )Nr   autor   )r?   r1   r,   r&   r&   r'   test_options_auto  r   r   c                 C   sP  t tdts	J t tdtsJ tdd# t tdts"J t tdts+J t tdts4J W d    n1 s>w   Y  tdd# t tdtsSJ t tdts\J t tdtseJ W d    n1 sow   Y  tdd$ t tdtsJ t tdtsJ t tdtsJ W d    d S 1 sw   Y  d S )Nr   r   r   r   )
isinstancer   r   r   r<   r   )r1   r,   r&   r&   r'   test_options_get_engine  s"   "r   c                  C   s  ddl m}  | d}| d}tsdnttjt|k }ts!dnttjt|k }to.| }to3| }|s|s|r\d| d}t	j
t|d td	 W d    n1 sVw   Y  nd
}t	j
t|d td	 W d    n1 stw   Y  |rd| d}t	j
t|d td	 W d    d S 1 sw   Y  d S d}t	j
t|d td	 W d    d S 1 sw   Y  d S d S d S )Nr   )VERSIONSr   r   FzPandas requires version .z. or newer of .pyarrow.r   r   z%Missing optional dependency .pyarrow.z. or newer of .fastparquet.z)Missing optional dependency .fastparquet.)Zpandas.compat._optionalr   getr)   r   r   __version__r-   r   r*   r   ImportErrorr   )r   Z
pa_min_verZ
fp_min_verZhave_pa_bad_versionZhave_fp_bad_versionZhave_usable_paZhave_usable_fpr   r&   r&   r'   "test_get_engine_auto_error_message2  sD   






"
"r   c                 C   s   | }t  0}|j||d d t||d}t || t||ddgd}t ||ddg  W d    d S 1 s9w   Y  d S N)r(   rn   r(   rK   rM   )r(   r;   rt   r~   r   r   ru   rV   r,   r1   rU   rz   resultr&   r&   r'   test_cross_engine_pa_fp]  s   
"r   c                 C   s   | }t  0}|j||d d t||d}t || t||ddgd}t ||ddg  W d    d S 1 s9w   Y  d S r   r   r   r&   r&   r'   test_cross_engine_fp_pak  s   
"r   c              	   C   s   t dg di}d}t 0}tjt|ddd |||  W d    n1 s*w   Y  W d    d S W d    d S 1 sBw   Y  d S )NrK   r2   zqStarting with pandas version 3.0 all arguments of to_parquet except for the argument 'path' will be keyword-only.F)r   Zcheck_stacklevelZraise_on_extra_warnings)r<   r=   rt   r~   assert_produces_warningFutureWarningr   )r(   rU   r   rz   r&   r&   r'   !test_parquet_pos_args_deprecationx  s   
"r   c                   @   s   e Zd Zdd Zdd ZdS )Basec              	   C   s|   t  0}tj||d t|||d d W d    n1 sw   Y  W d    d S W d    d S 1 s7w   Y  d S )Nr   rn   )rt   r~   r*   r   r   )selfrU   r(   excerr_msgrz   r&   r&   r'   check_error_on_write  s   
"zBase.check_error_on_writec              	   C   sx   t  .}t | t|||d d W d    n1 sw   Y  W d    d S W d    d S 1 s5w   Y  d S )Nr   )rt   r~   external_error_raisedr   )r   rU   r(   r   rz   r&   r&   r'   check_external_error_on_write  s   
"z"Base.check_external_error_on_writeN)__name__
__module____qualname__r   r   r&   r&   r&   r'   r     s    r   c                   @   s   e Zd Zdd Zdd Zejdg ddd Zd	d
 Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zejd!g d"d#d$ Zejjejjd%d& Zd'S )(	TestBasicc                 C   sF   t g dddt dtg dfD ]}d}| ||t| qd S )Nr2   r3   r6   rH   z+to_parquet only supports IO with DataFrames)r<   Seriesrk   rR   r   r   r   )r   r(   objr   r&   r&   r'   
test_error  s   zTestBasic.test_errorc                 C   s6   t tdttddd}ddg|_t|| d S )Nr@   r3   rA   ra   rd   r6   r\   r<   r=   rP   rQ   r;   r   )r   r(   rU   r&   r&   r'   test_columns_dtypes  s   
zTestBasic.test_columns_dtypesrn   )NgzipZsnappyZbrotlic                 C   s(   t dg di}t||d|id d S )Nr8   r2   rn   r|   r<   r=   r   )r   r(   rn   rU   r&   r&   r'   test_compression  s   zTestBasic.test_compressionc                 C   sJ   t tdttddd}t dtdi}t|||ddgid d S )Nr@   r3   rA   r   ra   r;   ry   r{   r<   r=   rP   rQ   r   )r   r(   rU   ry   r&   r&   r'   test_read_columns  s
   
zTestBasic.test_read_columnsc              	   C   sX   t ttdtdd}t dddgi}t||||ddgid	gdgd
dd d S )NrA   Zaabb)rd   partrd   r   r3   partition_colsr   )r   ==rK   )filtersr;   )rz   ry   r|   r{   rv   r   )r   r(   tmp_pathrU   ry   r&   r&   r'   test_read_filters  s   

zTestBasic.test_read_filtersc                 C   s   |dk}t dg di}t|| g dt jdddtdg d	g}|D ]}||_t|t jr7|jd |_t|||d
 q%g d|_d|j_	t|| d S )Nr   r8   r2   )r4   r5   rA   rH   r5   rI   r@   )r3   r5   rA   )rp   )r   r3   r4   r6   )
r<   r=   r   rT   rP   indexr   ZDatetimeIndex
_with_freqname)r   r(   rp   rU   Zindexesr   r&   r&   r'   test_write_index  s    

zTestBasic.test_write_indexc                 C   s:   |}t dg di}t jg d}||_t|| d S )Nr8   r2   )rK   r3   )rK   r4   )rL   r3   )r<   r=   
MultiIndexfrom_tuplesr   r   )r   r,   r(   rU   r   r&   r&   r'   test_write_multiindex  s
   zTestBasic.test_write_multiindexc                 C   s   |}t jdddd}t jtjddt| dftdd}t j	j
d	d
g|gddgd}|jd d}||fD ]}||_t|| t||dddgi|ddg d q8d S )Nz01-Jan-2018z01-Dec-2018ZMS)freqr4   r5   ABCr:   Level1Level2leveldate)r   r;   r8   r9   r{   ry   )r<   rT   r=   rR   randomdefault_rngstandard_normallenrP   r   Zfrom_productcopyr   r   )r   r,   r(   ZdatesrU   Zindex1Zindex2r   r&   r&   r'   test_multiindex_with_columns  s"   
z&TestBasic.test_multiindex_with_columnsc                 C   s   t g dg dd}d dd}|jdd}t||||d t jg dg ddg d	d
}t||||d g dg dg}t jttddd tdD d|d
}|jdd}t||||d d S )Nr2   )qrs)rK   rL   F)rn   r   T)dropr|   ry   )ZzyxZwvuZtsrr   r\   r\   r]   r]   r6   r6   quxr   onetwor   r   r   r   r   r      c                 S   s   g | ]}| qS r&   r&   ).0ir&   r&   r'   
<listcomp>  s    z7TestBasic.test_write_ignoring_index.<locals>.<listcomp>)r   r   )r<   r=   Zreset_indexr   rP   rQ   )r   r(   rU   r|   ry   arraysr&   r&   r'   test_write_ignoring_index  s    
 z#TestBasic.test_write_ignoring_indexc                 C   sb   t jg d}t jtjdd|d}|dkr$| ||t	d d S |dkr/t
|| d S d S )Nr   r4   )rA   r5   r:   r   Column name must be a stringr   )r<   r   r   r=   rR   r   r   r   r   	TypeErrorr   )r   r(   Z
mi_columnsrU   r&   r&   r'   test_write_column_multiindex  s   z&TestBasic.test_write_column_multiindexc                 C   sn   g dg dg}t jtjdd|d}ddg|j_|dkr*| ||t	d	 d S |d
kr5t
|| d S d S )Nr   )r3   r4   r3   r4   r3   r4   r3   r4   r4   r   r   r:   r   r   r   zColumn namer   )r<   r=   rR   r   r   r   r;   r   r   r   r   r   r(   r   rU   r&   r&   r'   &test_write_column_multiindex_nonstring-  s   z0TestBasic.test_write_column_multiindex_nonstringc                 C   sJ   |}g dg dg}t jtjdd|d}ddg|j_t|| d S )Nr   r   r4   r   r:   Z	ColLevel1Z	ColLevel2)	r<   r=   rR   r   r   r   r;   r   r   r   r,   r(   r   rU   r&   r&   r'   #test_write_column_multiindex_string>  s   z-TestBasic.test_write_column_multiindex_stringc                 C   s>   |}g d}t jtjdd|d}d|j_t|| d S )N)r\   r]   r6   r   r4   r   rA   r:   Z	StringCol)	r<   r=   rR   r   r   r   r;   r   r   r   r&   r&   r'   test_write_column_index_stringO  s   z(TestBasic.test_write_column_index_stringc                 C   sV   g d}t jtjdd|d}d|j_|dkr$| ||t	d d S t
|| d S )Nr3   r4   r5   rA   r4   r   r:   ZNonStringColr   r   )r<   r=   rR   r   r   r   r;   r   r   r   r   r   r&   r&   r'   !test_write_column_index_nonstring]  s   z+TestBasic.test_write_column_index_nonstringc           
      C   s  t d}|dkrt jjdd}|| ttg ddtg ddtg dtg d	tg d
dtg ddtg ddd}t	 }|
|| t||d}t||dd}W d    n1 slw   Y  |d jtdks}J ttjg dddtjg dddtjg dddtjg d	ddtjg d
ddtjg dddtjg dddd}	|dkr|jddd}|	jddd}	t||	 d S )Nzpyarrow.parquetr   z.Fastparquet nullable dtype support is disabledr   r3   r4   r5   NZint64Zuint8)rK   rL   rW   N)TFTNr   )      ?r_   r`   NZfloat32rD   )rK   rL   rW   rM   rN   rO   gr   numpy_nullabler(   dtype_backendrK   Int64rE   UInt8ra   booleanZFloat32Float64rW   r3   )Zaxis)r*   importorskipr/   r0   r.   r   tabler   rt   r~   write_tabler   rF   rR   r<   r=   r   ru   )
r   r(   r%   pqr/   r  rz   Zresult1Zresult2ry   r&   r&   r'   test_dtype_backendm  sH   


zTestBasic.test_dtype_backendrF   )	r   r   r   objectzdatetime64[ns, UTC]re   z	period[D]r  ra   c                 C   sT   t dt jg |di}d }|dkrt dt jg ddi}t||ddi|d d S )NvaluerE   re   r  r   r   r   )r<   r=   r   r   )r   r,   rF   rU   ry   r&   r&   r'   test_read_empty_array  s   
zTestBasic.test_read_empty_arrayc                 C   sv   |dkr	t | t|dddddd}|j| d t|j|d	}W d    n1 s.w   Y  t|| d S )
Nr   iodataparquetzsimple.parquetrb)mode)Zcontentr   )	r*   r  openZserve_contentreadr   Zurlrt   ru   )r   Z
httpserverdatapathr?   r(   rO   rU   r&   r&   r'   test_parquet_read_from_url  s   
z$TestBasic.test_parquet_read_from_urlN)r   r   r   r   r   r*   r/   parametrizer   r   r   r   r   r   r   r   r   r   r   r   r  r	  Znetwork
single_cpur  r&   r&   r&   r'   r     s2    
		 1
r   c                   @   s  e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	e
jje dde
jje dde
jdeejgdd Zdd Ze
jjdd Ze
jjdd Ze
jje
jddgg gdd Zdd  Zd!d" Zd#d$ Zd%d& Ze
jjded'd( gd)d*gd+d,d- Zd.d/ Zd0d1 Z d2d3 Z!d4d5 Z"d6d7 Z#d8d9 Z$d:d; Z%d<d= Z&d>d? Z'd@dA Z(dBdC Z)e
jdDdEdFge
j*dGdHge
jje+e,dIdJdKe--dLdFdFdEdEe--dLdFdFdFdFggdMdN Z.dOdP Z/dQdR Z0dSdT Z1e
jje2dUddVdW Z3dXdY Z4dZS )[TestParquetPyArrowc                 C   s@   |}t jdddd}|d }||d< g d|d< t|| d S )NrH   r5   Europe/BrusselsrJ   tzdatetime_tzTNTbool_with_none)r<   rT   r   r   )r   r,   rl   rU   dtir&   r&   r'   
test_basic  s   
zTestParquetPyArrow.test_basicc                 C   s<   |}t jdddd|d< t|||ddg dddgid	 d S )
NrH   r5   r  r  r  ra   rd   r;   r   )r<   rT   r   )r   r,   rl   rU   r&   r&   r'   test_basic_subset_columns  s   


z,TestParquetPyArrow.test_basic_subset_columnsc                 C   sL   |j |d}t|tsJ t|}t|}| }d |jd< t|| d S )Nr   ro   )	r   r   rc   r   r   r   rs   rt   ru   )r   r,   rl   Z	buf_bytesZ
buf_streamresry   r&   r&   r'   *test_to_bytes_without_path_or_buf_provided  s   
z=TestParquetPyArrow.test_to_bytes_without_path_or_buf_providedc                 C   s8   t jtdddtdd }| ||td d S )N   rA   r5   aaar:   zDuplicate column names found	r<   r=   rR   rS   ZreshaperP   r   r   r   r   r,   rU   r&   r&   r'   test_duplicate_columns  s   $z)TestParquetPyArrow.test_duplicate_columnsc                 C   s&   t dt jdddi}t|| d S )NrK   1 dayr5   rI   )r<   r=   timedelta_ranger   r$  r&   r&   r'   test_timedelta     z!TestParquetPyArrow.test_timedeltac                 C   s&   t dg di}| ||tj d S )NrK   rK   r3   r_   )r<   r=   r   r   ArrowExceptionr$  r&   r&   r'   test_unsupported  s   z#TestParquetPyArrow.test_unsupportedc                 C   sH   t jddt jd}tj|dgd}tr| ||tj d S t	|| d S )Nr4   
   rE   fp16r  r;   )
rR   rS   float16r<   r=   r
   r   r   r+  r   )r   r,   r  rU   r&   r&   r'   test_unsupported_float16   s
   z+TestParquetPyArrow.test_unsupported_float16zqPyArrow does not cleanup of partial files dumps when unsupported dtypes are passed to_parquet function in windowsr   zfloat16 works on 15	path_typec              	   C   s   t jddt jd}tj|dgd}t 2}||}ttj	 |j
||d W d    n1 s2w   Y  tj|r?J W d    d S 1 sJw   Y  d S )Nr4   r-  rE   r.  r/  )rz   r(   )rR   rS   r0  r<   r=   rt   r~   r   r   r+  r   osrz   isfile)r   r,   r2  r  rU   Zpath_strrz   r&   r&   r'    test_unsupported_float16_cleanup
  s   
"z3TestParquetPyArrow.test_unsupported_float16_cleanupc              	   C   sV   t t tdt jg dt g ddt jg dg dddd	}t|| d S )
NZabcdef)r\   r6   r6   r\   Nr\   r[   rE   )rK   rL   rW   rK   rW   rL   )rL   rW   rM   T)
categoriesZorderedrK   rL   rW   )r<   r=   CategoricalrP   ZCategoricalDtyper   r$  r&   r&   r'   test_categorical   s   z#TestParquetPyArrow.test_categoricalc                 C   s@   t d}|jdi |}d|i}t|||j d||d d S )Ns3fs
filesystem/pyarrow.parquetrz   r{   r|   r&   )r*   r  ZS3FileSystemr   r   )r   r?   s3_public_bucketr,   s3sor:  Zs3kwr&   r&   r'   test_s3_roundtrip_explicit_fs5  s   


z0TestParquetPyArrow.test_s3_roundtrip_explicit_fsc                 C   s(   d|i}t ||d|j d||d d S )Nstorage_optionss3://r<  r=  r   r   )r   r?   r>  r,   r?  r&   r&   r'   test_s3_roundtripB  s   
z$TestParquetPyArrow.test_s3_roundtrippartition_colr8   c              
   C   sr   t d | }|r |t|tj}d}|| |||< t|||d|j	 dd|i|d |dddd	 d S )
Nr:  categoryrC  z/parquet_dirrB  )r   rn   rB  Tr3   )ry   rz   r{   r|   rq   rv   )
r*   r  r   rj   dictfromkeysrR   Zint32r   r   )r   r?   r>  r,   rF  r?  Zexpected_dfZpartition_col_typer&   r&   r'   test_s3_roundtrip_for_dirN  s*   

z,TestParquetPyArrow.test_s3_roundtrip_for_dirc                 C   sH   t d t }|| t|}|rts|jd|_t	|| d S )Nr   r   )
r*   r  r   r   r   r   r;   rj   rt   ru   )r   r?   r   bufferZdf_from_bufr&   r&   r'   test_read_file_like_obj_supportu  s   

z2TestParquetPyArrow.test_read_file_like_obj_supportc                 C   s   t d |dd |dd t jtdd td W d    n1 s'w   Y  t jtdd |d W d    d S 1 sDw   Y  d S )Nr   HOMEZTestingUserZUSERPROFILEz.*TestingUser.*r   z~/file.parquet)r*   r  Zsetenvr   OSErrorr   r   )r   r?   Zmonkeypatchr&   r&   r'   test_expand_user~  s   

"z#TestParquetPyArrow.test_expand_userc                 C   s>   ddg}|}|j ||d d t|| t|j|jksJ d S )Nrf   rd   r   rn   r   r   r   shape)r   r   r,   rl   r   rU   r&   r&   r'   test_partition_cols_supported  s
   
z0TestParquetPyArrow.test_partition_cols_supportedc                 C   s@   d}|g}|}|j ||d d t|| t|j|jksJ d S )Nrf   rP  rQ  )r   r   r,   rl   r   partition_cols_listrU   r&   r&   r'   test_partition_cols_string  s   
z-TestParquetPyArrow.test_partition_cols_stringc                 C   s   | S r!   r&   )xr&   r&   r'   <lambda>  s    zTestParquetPyArrow.<lambda>ra   zpathlib.Path)Zidsc           	      C   s<   d}|g}|}||}|j ||d t|j|jksJ d S )Nr9   )r   )r   r   rR  )	r   r   r,   r?   r2  r   rT  rU   rz   r&   r&   r'   test_partition_cols_pathlib  s   z.TestParquetPyArrow.test_partition_cols_pathlibc                 C   s   t jg g d}t|| d S )N)r   r;   r   r$  r&   r&   r'   test_empty_dataframe  s   z'TestParquetPyArrow.test_empty_dataframec                 C   sV   dd l }tdddgi}||jd| dg}|t}t||d|i|d d S )Nr   rV  r3   )typer   r   )	r   r<   r=   r   ZfieldZbool_rj   rf   r   )r   r,   r   rU   r   Zout_dfr&   r&   r'   test_write_with_schema  s
   
z)TestParquetPyArrow.test_write_with_schemac                 C   s   t d ttjg dddtjg dddtjg dddd}|r4tr4t|||d	d
id nt|| tdtjg dddi}t|| d S )Nr   r2   r   rE   ZUInt32rX   ra   r7  rW   r   ry   rK   r   )r*   r  r<   r=   r   r   r   rj   )r   r,   r   rU   r&   r&   r'    test_additional_extension_arrays  s   

z3TestParquetPyArrow.test_additional_extension_arraysc                 C   s   t d tdtjg dddi}td|4 |r5tr$|d}n	|d| d	}|jd|_n	|d| d	}t	|||d
 W d    d S 1 sPw   Y  d S )Nr   rK   rX   string[pyarrow]rE   string_storager   zstring[]r\  )
r*   r  r<   r=   r   r   r   rj   r;   r   )r   r,   r_  r   rU   ry   r&   r&   r'    test_pyarrow_backed_string_array  s   
"z3TestParquetPyArrow.test_pyarrow_backed_string_arrayc                 C   sV   t d ttjg dtjddddtjtjddddd}t	|| d S )	Nr   ))r   r3   )r3   r4   )r5   rA   z
2012-01-01r5   D)rJ   r   rA   )rW   rM   rN   )
r*   r  r<   r=   ZIntervalIndexr   period_rangeZfrom_breaksrT   r   r$  r&   r&   r'   test_additional_extension_types  s   

z2TestParquetPyArrow.test_additional_extension_typesc                 C   s4   d}t dt jddddi}t||d|id d S )	Nz2.6rK   z
2017-01-01Z1nsr-  r   rJ   versionr   )r<   r=   rT   r   )r   r,   ZverrU   r&   r&   r'   test_timestamp_nanoseconds  s   z-TestParquetPyArrow.test_timestamp_nanosecondsc           	      C   s   t dd d|g }tj|d|id}|d d  }tr#|jd|_|jtj	j
kr[zdd l}W n	 ty9   Y n"w |jj|}|| d }|j||_|d j||d< t||d	|d
 d S )Nr   11.0.0   index_as_colr   r  nsr   <   F)rr   ry   )r*   r  r<   r=   r   r   Zas_unitZtzinforg   timezoneutcpytzr   r  Z	utcoffsetZFixedOffsetZtotal_secondsZ
tz_convertZdtr   )	r   r,   rm   idxrU   ry   rp  offsetr  r&   r&   r'   test_timezone_aware_index  s"   
z,TestParquetPyArrow.test_timezone_aware_indexc                 C   sz   t d tdttdi}t }|j||d t	||dgd}W d    n1 s.w   Y  t
|dks;J d S )Nr   rK   r5   r   rK   r   r   r   r3   )r*   r  r<   r=   rP   rQ   rt   r~   r   r   r   )r   r,   rU   rz   r   r&   r&   r'   test_filter_row_groups  s   

z)TestParquetPyArrow.test_filter_row_groupsc                 C   s   t jtjddg dd}t }|j||d t	||}W d    n1 s+w   Y  |r?t
|jt jjjs=J d S t
|jt jjjsJJ d S )Nr4   )r-  r5   )r8   r9   Cr:   r   )r<   r=   rR   r   r   r   rt   r~   r   r   r   Z_mgrcoreZ	internalsZArrayManagerZBlockManager)r   r,   Zusing_array_managerrU   rz   r   r&   r&   r'   test_read_parquet_manager  s   
z,TestParquetPyArrow.test_read_parquet_managerc                 C   s   dd l }|}tjdddd}|d }||d< g d|d< |j|}|jtjd	}trO|d
 	d|d
< |d 	d|d< |d 	t|j
ddd|d< t||ddi|d d S )Nr   rH   r5   r  r  r  r  r  )Ztypes_mapperrg   ztimestamp[us][pyarrow]rh   us)Zunitr  r   r   r(   r{   ry   )r   r<   rT   r   ZTableZfrom_pandasZ	to_pandasZ
ArrowDtyper	   rj   Z	timestampr   )r   r,   rl   r   rU   r  Zpa_tablery   r&   r&   r'   &test_read_dtype_backend_pyarrow_config%  s,   

z9TestParquetPyArrow.test_read_dtype_backend_pyarrow_configc                 C   sn   t jdddgit jddgdddd	}| }d
d l}t|jtdkr+|jd|_t	||ddi|d d S )NrK   r3   r4   r5   rA   testr   zint64[pyarrow])r   rF   r   rh  r   r   r{  )
r<   r=   r>   r   r   r   r   r   rj   r   )r   r,   rU   ry   r   r&   r&   r'   ,test_read_dtype_backend_pyarrow_config_indexC  s   
z?TestParquetPyArrow.test_read_dtype_backend_pyarrow_config_indexr;   r   r3   rY   rZ   z*https://github.com/apache/arrow/pull/44171)r   r   r     c                 C   s2   t tdttddd}||_t|| d S )Nr@   r3   rA   r   r   )r   r,   r;   rU   r&   r&   r'   test_columns_dtypes_not_invalidS  s   z2TestParquetPyArrow.test_columns_dtypes_not_invalidc                 C   s(   t jt jg dddd}t|| d S )Nr7  custom namer~  r   r<   r=   r>   r   r$  r&   r&   r'   test_empty_columnsk  s   z%TestParquetPyArrow.test_empty_columnsc                 C   sR   |d }t jddgid}ddi|_|j||d t||d}|j|jks'J d S )Nztest_df_metadata.pr3   r  Ztest_attributer   )r<   r=   attrsr   r   )r   r   r,   rz   rU   Znew_dfr&   r&   r'   test_df_attrs_persistencep  s   
z,TestParquetPyArrow.test_df_attrs_persistencec           	      C   s   |d }t jdddgiddgd}|j||d t dd	 t||d}W d    n1 s/w   Y  t jtjd
}t jdddgi|t jddg|dt jdgt	rU|sUt
n|dd}t|| d S )Nztest_string_inference.prK   rV  yrL   )r  r   r   future.infer_stringTZna_valuerE   )r  rF   r   r;   )r<   r=   r   r   r   StringDtyperR   ri   r>   r   r  rt   ru   )	r   r   r,   r   rz   rU   r   rF   ry   r&   r&   r'   test_string_inferencex  s*   
z(TestParquetPyArrow.test_string_inferenceznot supported before 11.0c                 C   s   dd l }|d }tjdtdgidd}|j||d|dfgd t|}tr5tjdd	gid
d}ntjdtdgidd}t	
|| d S )Nr   z	decimal.prK   z123.00r^  rE   ri  )r   Z123zstring[python]r  )r   r<   r=   r   r   r   Z
decimal128r   r   rt   ru   )r   r   r,   rz   rU   r   ry   r&   r&   r'   test_roundtrip_decimal  s    z)TestParquetPyArrow.test_roundtrip_decimalc                 C   s   dd l }dd lm} |d }|d|g d| i}||| tdd t	|}W d    n1 s8w   Y  tj
dg ditjtjdtjdgtjtjddd	}t|| d S )
Nr   zlarge_string.prK   )NrL   rW   r  Tr  rE   )r  rF   r;   )r   Zpyarrow.parquetr  r  r   Zlarge_stringr  r<   r   r   r=   r  rR   ri   r>   rt   ru   )r   r   r,   r  rz   r  r   ry   r&   r&   r'   #test_infer_string_large_string_type  s   

z6TestParquetPyArrow.test_infer_string_large_string_typeN)5r   r   r   r  r  r   r%  r(  r,  r1  r*   r/   r0   r   skipifr
   r  r   pathlibPathr5  r9  r  rA  rE  rJ  rL  rO  rS  rU  rX  rY  r[  r]  ra  rd  rg  rs  rv  ry  r|  r  r#   r   NotImplementedErrorrg   r  r  r  r  r   r  r  r&   r&   r&   r'   r    s    


			
!		

r  c                   @   s   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	e
jjdd Zdd Zdd Zdd Zdd Ze
jje dddd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd)d* Zd+d, Ze
jje ddd-d. Zd/S )0TestParquetFastParquetc                 C   sF   |}t jdddd}|d }||d< t jddd|d< t|| d S )	NrH   r5   z
US/Easternr  r  r&  rI   Z	timedelta)r<   rT   r   r'  r   )r   r1   rl   rU   r  r&   r&   r'   r    s   
z!TestParquetFastParquet.test_basicc                 C   s   t tdttddd}t}d}ddg|_| |||| ddg|_| |||| td	ddddtd	ddddg|_| |||| d S )
Nr@   r3   rA   r   r   r   rY   rZ   r  )r<   r=   rP   rQ   r   r;   r   rg   )r   r1   rU   errr   r&   r&   r'   test_columns_dtypes_invalid  s   

z2TestParquetFastParquet.test_columns_dtypes_invalidc                 C   s<   t jtdddtdd }d}| ||t| d S )Nr!  rA   r5   r"  r:   z9Cannot create parquet dataset with duplicate column namesr#  r   r1   rU   r   r&   r&   r'   r%    s   $z-TestParquetFastParquet.test_duplicate_columnsc                 C   s   dd l }t|jtdk r ttjtdkr |tjjdd t	dg di}tj	ddtj
d	gid
d}t|||dd d S )Nr   	2024.11.0z2.0.0z$fastparquet uses np.float_ in numpy2r   rK   )TNFr   g        r0  rE   F)ry   rr   )r   r   r   rR   r.   r*   r/   r0   r<   r=   ri   r   )r   r1   r%   r   rU   ry   r&   r&   r'   test_bool_with_none  s   z*TestParquetFastParquet.test_bool_with_nonec                 C   sT   t dt jddddi}| ||td  t dg di}d}| ||t| d S )NrK   Z2013Mr5   re  r*  z"Can't infer object conversion type)r<   r=   rc  r   r   r  r&   r&   r'   r,    s
   z'TestParquetFastParquet.test_unsupportedc                 C   s&   t dt tdi}t|| d S )NrK   r@   )r<   r=   r8  rP   r   )r   r1   rU   r&   r&   r'   r9    r)  z'TestParquetFastParquet.test_categoricalc                 C   sx   dt tdi}t|}t }|j||d dd t||dgd}W d    n1 s-w   Y  t|dks:J d S )NrK   r5   r3   )r(   rn   Zrow_group_offsetsrt  ru  )	rP   rQ   r<   r=   rt   r~   r   r   r   )r   r1   rM   rU   rz   r   r&   r&   r'   rv    s   

z-TestParquetFastParquet.test_filter_row_groupsc                 C   s*   t ||d|j dd|id |dd d S )NrC  z/fastparquet.parquetrB  )rn   rB  r=  rD  )r   r?   r>  r1   r?  r&   r&   r'   rE  	  s   
z(TestParquetFastParquet.test_s3_roundtripc                 C   s\   ddg}|}|j |d|d d tj|sJ dd l}|t|dj}t|dks,J d S )Nrf   rd   r   r(   r   rn   r   Fr4   	r   r3  rz   existsr   ZParquetFiler   Zcatsr   r   r   r1   rl   r   rU   r   Zactual_partition_colsr&   r&   r'   rS       z4TestParquetFastParquet.test_partition_cols_supportedc                 C   sX   d}|}|j |d|d d tj|sJ dd l}|t|dj}t|dks*J d S )Nrf   r   r  r   Fr3   r  r  r&   r&   r'   rU  $  s   z1TestParquetFastParquet.test_partition_cols_stringc                 C   s\   ddg}|}|j |dd |d tj|sJ dd l}|t|dj}t|dks,J d S )Nrf   rd   r   )r(   rn   partition_onr   Fr4   r  r  r&   r&   r'   test_partition_on_supported4  r  z2TestParquetFastParquet.test_partition_on_supportedc                 C   sX   ddg}|}d}t jt|d |j|dd ||d W d    d S 1 s%w   Y  d S )Nrf   rd   zYCannot use both partition_on and partition_cols. Use partition_cols for partitioning datar   r   )r(   rn   r  r   )r*   r   r   r   )r   r   r1   rl   r   rU   r   r&   r&   r'   3test_error_on_using_partition_cols_and_partition_onD  s   "zJTestParquetFastParquet.test_error_on_using_partition_cols_and_partition_onzfastparquet writes into Indexr   c                 C   s"   t  }| }t|||d d S )Nr\  )r<   r=   r   r   r   r1   rU   ry   r&   r&   r'   rY  W  s   z+TestParquetFastParquet.test_empty_dataframec                 C   s~   dd l }t|jtdkr t|jtdk r |tjjdd d|g }tj|d|id}|	 }d	|j
_t|||d
 d S )Nr   z2022.12r  zCfastparquet bug, see https://github.com/dask/fastparquet/issues/929r   ri  rj  rk  r   r\  )r   r   r   r.   r*   r/   r0   r<   r=   r   r   r   r   )r   r1   rm   r%   r   rq  rU   ry   r&   r&   r'   rs  ^  s    
	z0TestParquetFastParquet.test_timezone_aware_indexc              
   C   s   t dddgi}t g}|| tjtdd$ tt	 t
|ddd W d    n1 s2w   Y  W d    n1 sAw   Y  tjtdd t
|dd	d
 W d    n1 s_w   Y  W d    d S W d    d S 1 sww   Y  d S )NrK   r3   r4   z!not supported for the fastparquetr   r   T)r(   Zuse_nullable_dtypesr   r   )r<   r=   rt   r~   r   r*   r   r   r   r   r   )r   r1   rU   rz   r&   r&   r'   &test_use_nullable_dtypes_not_supportedu  s   

"z=TestParquetFastParquet.test_use_nullable_dtypes_not_supportedc              	   C   s   t d7}t|d tjtdd t|dd W d    n1 s&w   Y  t|j	dd W d    d S 1 s?w   Y  d S )	Ntest.parquets   breakit r   r   r   F)Z
missing_ok)
rt   r~   r  r  write_bytesr*   r   	Exceptionr   unlink)r   rz   r&   r&   r'   $test_close_file_handle_on_read_error  s   "z;TestParquetFastParquet.test_close_file_handle_on_read_errorc              	   C   s   t jddgddgdd}td*}t| d}|| W d    n1 s*w   Y  t||d}W d    n1 s?w   Y  t|| d S )Nr   r3   r7   r  r  wbr   )	r<   r=   rt   r~   r  encoder   r   ru   )r   r(   rU   rz   rO   r   r&   r&   r'   test_bytes_file_name  s   z+TestParquetFastParquet.test_bytes_file_namec              	   C     t d tjddgddgdd}t '}t jtdd |j|ddd	 W d    n1 s1w   Y  W d    n1 s@w   Y  t 7}t	
|d
 t jtdd t|ddd	 W d    n1 skw   Y  W d    d S W d    d S 1 sw   Y  d S )Nr   r   r3   r7   r  zfilesystem is not implementedr   r6   r(   r;  rY   )r*   r  r<   r=   rt   r~   r   r  r   r  r  r  r   r   rU   rz   r&   r&   r'   test_filesystem_notimplemented  (   


"z5TestParquetFastParquet.test_filesystem_notimplementedc              	   C   r  )Nr   r   r3   r7   r  z1filesystem must be a pyarrow or fsspec FileSystemr   r6   r  rY   )r*   r  r<   r=   rt   r~   r   r   r   r  r  r  r   r  r&   r&   r'   test_invalid_filesystem  r  z.TestParquetFastParquet.test_invalid_filesystemc              	   C   s(  t d}tjddgddgdd}t ,}t jtdd |j|d|	 d	d
id W d    n1 s6w   Y  W d    n1 sEw   Y  t <}t
|d t jtdd t|d|	 d	d
id W d    n1 suw   Y  W d    d S W d    d S 1 sw   Y  d S )Nz
pyarrow.fsr   r3   r7   r  z8storage_options not supported with a pyarrow FileSystem.r   r   r6   r\   )r(   r;  rB  rY   )r*   r  r<   r=   rt   r~   r   r  r   ZLocalFileSystemr  r  r  r   )r   Zpa_fsrU   rz   r&   r&   r'   .test_unsupported_pa_filesystem_storage_options  s@   


"zETestParquetFastParquet.test_unsupported_pa_filesystem_storage_optionsc              	   C   s   d}t dttddi}td3}|| tjt	|d t
|dd W d    n1 s1w   Y  W d    d S W d    d S 1 sIw   Y  d S )	NzPdtype_backend numpy is invalid, only 'numpy_nullable' and 'pyarrow' are allowed.rd   r3   rA   ztmp.parquetr   numpy)r   )r<   r=   rP   rQ   rt   r~   r   r*   r   r   r   )r   r(   r   rU   rz   r&   r&   r'   test_invalid_dtype_backend  s   
"z1TestParquetFastParquet.test_invalid_dtype_backendc                 C   sF   t jt jg dddd}t jt jg dddd}t|||d d S )Nr7  r  r~  r   r\  r  r  r&   r&   r'   r    s   z)TestParquetFastParquet.test_empty_columnsN)r   r   r   r  r  r%  r  r,  r9  rv  r*   r/   r  rE  rS  rU  r  r  r  r   rY  rs  r  r  r  r  r  r  r  r  r&   r&   r&   r'   r    s4    	



r  )	NNNNNTFTr4   )M__doc__rg   Zdecimalr   r
  r   r3  r  r  rR   r*   Zpandas._configr   r   Zpandas._config.configr   Zpandas.compatr   Zpandas.compat.pyarrowr   r	   r
   r   r   Zpandasr<   Zpandas._testingZ_testingrt   Zpandas.util.versionr   Zpandas.io.parquetr   r   r   r   r   r   r)   r   r   r-   r/   filterwarningsZ
pytestmarkZfixturer#   r  r0   r(   r,   r1   r?   rV   rl   Znowrn  ro  minmaxstrptimerm   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r&   r&   r&   r'   <module>   s    
	







B	+  5   u