o
    Ehs                     @   s  d dl m Z mZ d dlmZ d dlZd dlZd dlmZ d dl	m
Z
mZmZmZmZmZ d dlmZ d dlmZ d dlmZ ejdd d	ed
dggdd Zdd Zdd Zdd Zdd Zdd Zejdddgddggdd Zdd Z ejdd g d!fd"g d#fd$g d%fd&g d%fd'g d(fd)g d*fd+g d,fd-g d.fgd/d0 Z!ejdd&g d1fd2g d3fd$g d4fgd5d6 Z"ej#d7ejdd&d8ej$d	d8ej$d	d8gfd2d8ej$d8d8ej$d	d8gfgd9d: Z%d;d< Z&d=d> Z'd?d@ Z(dAdB Z)ejdCdDdEddg dFfdGdEddg dHfdDdEdIdg dJfdGdEdIdg dHfdDdKddLg dHfdGdKd dMg dNfgdOdP Z*dQdR Z+ejdSdDdGgdTdU Z,ejdVdDg dWgdGg dXggdYdZ Z-d[d\ Z.ejd]d^dMdd
dej$d_gfd dId`dadaej$dagfddd`dbdcej$ddgfdd^d`dedfej$dggfdIdhd^didjej$dkgfdId d`dldmej$dmgfgdndo Z/ejd]d^dMdd
ej$dej$ej$dej$ej$gfddpd`dqej$drej$ej$dej$ej$gfgdsdt Z0ejdud^dIddvdwdxdaej$gfd^dIdydzd{d|dyej$gfd^d^dyd}d~ddyej$gfd^dpdyd}d~ddyej$gfd`ddyddddyej$gfdddyddddyej$gfddLdyddddyej$gfddIdydd{ddyej$gfgdd Z1ejdVdddej$dggdddej$dggdddej$dgggdd Z2ejdVddej$dggddej$dggddej$dgggdd Z3ejdVdg dgdg dgdg dggdd Z4ejddg dfdg dfgdd Z5ejddg dfdg dfgdd Z6dd Z7dd Z8dd Z9dd Z:dd Z;dd Z<dd Z=dd Z>ddĄ Z?ejdddddej$dgfddddej$dgfgddτ Z@ddф ZAddӄ ZBejdddgdfg d֢dfg dآdfg dڢdfde ddpdpgdfgdd݄ ZCejddpej$gdfe ddpdpgdfedpgdfgdd ZDdd ZEdd ZFdd ZGdd ZHdd ZIdd ZJdd ZKdd ZLdd ZMdd ZNdd ZOdS )    )datetime	timedelta)PathN)pa_version_under21p0)NA	DataFrameIndex
MultiIndexSeriesStringDtype)StringMethods)is_object_or_nan_string_dtypepatternTfoobarc                 C   s   t ddg}dt| j }tjt|d |j|  W d    n1 s&w   Y  tjt|d |j|  W d    d S 1 sDw   Y  d S )Nr   r   z expected a string or tuple, not match)	r
   type__name__pytestraises	TypeErrorstr
startswithendswith)r   sermsg r   Alib/python3.10/site-packages/pandas/tests/strings/test_strings.py)test_startswith_endswith_non_str_patterns   s   "r   c                  C   sJ   t ddg} tjtdd t| j W d    d S 1 sw   Y  d S )Nr   r   z&'StringMethods' object is not iterabler   )r
   r   r   r   iterr   )r   r   r   r   test_iter_raises$   s   "r!   c                 C   sZ   t ddtjdg| d}|jd}t| rtjnd}t ddtjd	g|d}t|| d S )
Nr   ZfoofooZfoooofooofommmfoodtypezf[o]+Int64         )	r
   npnanr   countr   float64tmassert_series_equalany_string_dtyper   resultexpected_dtypeexpectedr   r   r   
test_count.   s   r3   c               
   C   sf   t dtjddt dd ddg	td} | jd}t dtjdtjtjdtjtjtjg	}t	|| d S )	NabTr   r%          @r"   r   )
r
   r(   r)   r   todayobjectr   r*   r,   r-   r   r0   r2   r   r   r   test_count_mixed_object8   s   &r:   c                 C   s   t ddtjdtjdg| d}|jd}t ddtjd	tjd
g| d}t|| |jg d}t ddtjdtjdg| d}t|| d S )Nr4   r5   cdr"      aaabbbZcccZddd)r%   r&   r=   r'         bbZccccZdddddd)r
   r(   r)   r   repeatr,   r-   r/   r   r0   r2   r   r   r   test_repeatB   s   rE   c               
   C   sd   t dtjddt dd ddg	} | jd}t dtjd	tjtjd
d tjtjg	td}t	|| d S )Nr4   r5   Tr   r%   r6   r=   r>   r?   Z	foofoofoor"   )
r
   r(   r)   r   r7   r   rC   r8   r,   r-   r9   r   r   r   test_repeat_mixed_objectR   s    rF   zarg, repeatr'   r5   c                 C   s@   t d|g| d}|jd|g}t dd g| d}t|| d S )Nr4   r"   r=   r>   )r
   r   rC   r,   r-   )r/   argrC   r   r0   r2   r   r   r   test_repeat_with_null\   s   rH   c           
      C   s  t | d }}t dd}t| rt dd}t td}n
t dd}t dd}t td}t td}t }t||j| d|j ksEJ t||j	  t||j
d t||jd t||jd t||jd t||j  t||j  t||jdd t||jd	 t||jd
 ttdg| d|jjddd ttddg| d|jjddd t||jjddd ttddg| d|jjddd t|jg dd|j  t||jd t||j  t||jd t||jd t||jd t||jd t||jd t||jd t||jd t||jj ddd t||j d t||jj!ddd t||j!d t||jj"dd t||jj"dd t||j#  t||j$  t||j%  t||j&d t||j'd t||j(d t||j)d t||j*  t||j+  t||j,  t||j-  t||j.  t||j/  t||j0  t||j1  t||j2  t||j3  t||j4  t||j5d t6dd}	t||j7|	 d S )Nr"   r   int64r$   boolean r4   r5   r=   z^ar   )columnsr#   z()T)expandr%   z()()FZaxis*   stop)stepasciiNFC)8r
   r   boolr8   r   r,   r-   r   cattitler*   containsr   r   lowerupperreplacerC   r   Zassert_frame_equalextractZset_axisZget_dummiesjoinlenfindallfindrfindZpadcentersplitrsplit	partition
rpartitionslicestriplstriprstripwrapgetdecodeencodeisalnumisalphaisdigitisspaceislowerisupperistitle	isnumeric	isdecimal
capitalizeswapcase	normalize	maketrans	translate)
r/   Z	empty_stremptyZempty_inferred_strZ	empty_intZ
empty_boolZempty_objectZempty_bytesZempty_dftabler   r   r   test_empty_str_methodse   s   





r   zmethod, expectedro   )
TTTTTFTTFFrp   )
TTTFFFTFFFrq   )
FFFTFFFTFFrv   rr   )
FFFFFFFFFTrs   )
FTFFFFFFFFrt   )
TFFFTFTFFFru   )
TFTFTFFFFFc                    s   t g d|d}t|rdnd}t ||d}t|j  }t||  fdd|D }t||ks4J tj|j	g d< t|j  }|j
dkrW|t}tj|j	g d< d S |j
d	kred
|j	g d< d S tj|j	g d< d S )N)
Ar5   ZXy4Z3ArK   ZTTZ55-z  r"   rU   rJ   c                       g | ]}t |  qS r   getattr.0itemmethodr   r   
<listcomp>       z"test_ismethods.<locals>.<listcomp>)r%   r&   r=   r'   r8   r   F)r
   r   r   r   r,   r-   listr(   r)   ilocr#   astyper8   )r   r2   r/   r   r1   r0   Zexpected_stdlibr   r   r   test_ismethods   s$    


r   )FTTTFTTFrw   )FTFFFFTF)FTTFFFTFc                    s   t g d|d}t|rdnd}t ||d} dkr2t|jtr2|jjdkr2ts2d|jd< d|jd	< t|j	  }t
|| |d
ksMt|tr`|jdkrb fdd|D }t||ks^J d S d S d S )N)r   3   ³   ¼   ★u   ፸   ３fourr"   rU   rJ   rq   ZpyarrowTr=   r@   r8   Zpythonc                    r   r   r   r   r   r   r   r     r   z*test_isnumeric_unicode.<locals>.<listcomp>)r
   r   
isinstancer#   r   Zstorager   r   r   r   r,   r-   r   )r   r2   r/   r   r1   r0   r   r   r   test_isnumeric_unicode   s2   



r   z4ignore:Downcasting object dtype arrays:FutureWarningFc                 C   s~   dt jddt jddg}t||d}|dkr"t|tddt}nt|r(d	nd
}t||d}t|j	|  }t
|| d S )Nr   r   r   r   r   r"   r   Fr8   rJ   )r(   r)   r
   r8   Zfillnar   rU   r   r   r   r,   r-   )r   r2   r/   valuesr   r1   r0   r   r   r   test_isnumeric_unicode_missing   s   	r   c                 C   sD   t ddtjdg| d}|jdjd}|t}t	|| d S )NZa_b_cZc_d_eZf_g_hr"   _)
r
   r(   r)   r   rc   r]   r   r8   r,   r-   rD   r   r   r   test_spilt_join_roundtrip7  s   
r   c               
   C   sl   t dtjddt dd ddg	} | jdjd}t dtjdtjtjdd tjtjg	td}t	
|| d S )	Na_basdf_cas_asdfTr   r%   r6   r   r"   )r
   r(   r)   r   r7   r   rc   r]   r8   r,   r-   r9   r   r   r   &test_spilt_join_roundtrip_mixed_object>  s   r   c                 C   sb   t dddtjdddg| d}|j }t| rdnd	}t d
ddtjdddg|d}t|| d S )Nr   ZfoooZfoooooZfooooooozfoo
u   あr"   r+   r$   r=   r'   rA      r%   )r
   r(   r)   r   r^   r   r,   r-   r.   r   r   r   test_lenJ  s   
r   c               
   C   s`   t dtjddt dd ddg	} | j }t dtjdtjtjdtjtjtjg	}t|| d S )	Nr   r   Tr   r%   r6   r=      )	r
   r(   r)   r   r7   r   r^   r,   r-   r9   r   r   r   test_len_mixedW  s   
&r   zmethod,sub,start,end,expectedindexZEF)r'   r=   r%   r   rindex)r'   r@      r'   r=   )r'   r=   r   r'   Er   r@   )r'   r=   r%   r'   c           
         s   |g d|d}t |rtjnd}|||d}t|j }	|tu r,t|	| nt|	|  fdd|D }t	|	|ksFJ d S )NZABCDEFGZBCDEFEFZ	DEFGHIJEFZEFGHEFr"   r$   c                    s   g | ]}t | qS r   r   r   endr   startsubr   r   r   |  s    ztest_index.<locals>.<listcomp>)
r   r(   rI   r   r   r
   r,   r-   assert_index_equalr   )
r   r   r   r   index_or_seriesr/   r2   objr1   r0   r   r   r   
test_index`  s   r   c                 C   sP   | g d|d}t jtdd |jd W d    d S 1 s!w   Y  d S )Nr   r"   zsubstring not foundr   ZDE)r   r   
ValueErrorr   r   )r   r/   r   r   r   r   test_index_not_found_raises  s   "r   r   c                 C   sT   | g |d}d}t jt|d t|j|d W d    d S 1 s#w   Y  d S )Nr"   z!expected a string object, not intr   r   )r   r   r   r   r   )r   r/   r   r   r   r   r   r   test_index_wrong_type_raises  s
   "r   zmethod, exp)r%   r%   r   )r=   r%   r&   c                 C   s\   t dddtjg| d}t| rtjnd}t|j|d}t |tjg |d}t|| d S )NZabcbabZbcber"   r$   r5   )	r
   r(   r)   r   r+   r   r   r,   r-   )r/   r   expr   r1   r0   r2   r   r   r   test_index_missing  s   r   c                 C   sh   t dg| d}|jd}t g dgtd}t|| |jjdddd}t dg| d}t|| d S )	NzA|B|Cr"   |)r   BC F)ZregexzA B C)r
   r   rc   r8   r,   r-   r[   rD   r   r   r   test_pipe_failures  s   r   zstart, stop, step, expectedr&   ZbazrK   ZowtoofaaZowtrabaaZxuqzabaaZowtooZowtraZxuqza
   ZotoZatoZaqxZofaZabac                 C   sB   t ddtjdg|d}|j| ||}t ||d}t|| d S )NaafootwoaabartwoZaabazquxr"   )r
   r(   r)   r   rg   r,   r-   )r   rQ   rR   r2   r/   r   r0   r   r   r   
test_slice  s   r   r%   ZoofZrabc              	   C   sJ   t dtjddt d ddg}|j| ||}t |td}t	|| d S )Nr   r   Tr%   r6   r"   )
r
   r(   r)   r   r7   r   rg   r8   r,   r-   )r   rQ   rR   r2   r   r0   r   r   r   test_slice_mixed_object  s   r   zstart,stop,repl,expectedZshrtza it longerZevnlongerthanthatzZshzrtza zit longerZevznlongerthanthatZshzortza zbit longerZevzenlongerthanthatZshorzza bit longezZevenlongerthanthazZzrtZzerZzatrA   Zshortzza bit zngerZevenlozerthanthatiZevenlongzerthanthatc                 C   sD   t ddddtjg|d}t ||d}|j| ||}t|| d S )NZshortza bit longerZevenlongerthanthatrK   r"   )r
   r(   r)   r   Zslice_replacer,   r-   )r   rQ   replr2   r/   r   r0   r   r   r   test_slice_replace  s   r   rh   ZaarB   Zccri   zaa   zbb 
cc  rj   z  aaz bbc                 C   s@   t ddtjdg| d}t|j| }t || d}t|| d S )Nz  aa   z bb 
r   r"   )r
   r(   r)   r   r   r,   r-   r/   r   r   r   r0   r2   r   r   r   test_strip_lstrip_rstrip  s   	r   zaa  zbb 	
c              	   C   s^   t dtjddt d ddg}t|j|  }t |tjtjd tjtjg td}t	|| d S )Nz  aa  z bb 	
Tr%   r6   r"   )
r
   r(   r)   r   r7   r   r   r8   r,   r-   )r   r   r   r0   r2   r   r   r   %test_strip_lstrip_rstrip_mixed_object  s   	"r   )ABC BNSDLDFJH )ZABCxxr   LDFJH xx)ZxxABCxx BNSDr   c                 C   s<   t g d| d}t|j|d}t || d}t|| d S )N)ZxxABCxxr   r   r"   x)r
   r   r   r,   r-   r   r   r   r   test_strip_lstrip_rstrip_args  s   	r   zprefix, expectedr4   )r5   z b cbcr   )rK   a b cr   c                 C   8   t g d| d}|j|}t || d}t|| d S N)r   r   r   r"   )r
   r   removeprefixr,   r-   )r/   prefixr2   r   r0   ser_expectedr   r   r   test_removeprefix     r   zsuffix, expectedr;   )r   za b r5   r   )r   r   rK   c                 C   r   r   )r
   r   removesuffixr,   r-   )r/   suffixr2   r   r0   r   r   r   r   test_removesuffix!  r   r   c              
   C   s   t dddddtjdddg	| d	}|jd
 }|jd
}t|| |jd d }|jjdd}t|| |jdd d }|jjddd}t|| d S )NZYYYr   r   Z
YYYYYYbYYYZBYYYcYYYZCYYYBYYYZdogZcYYYtr"   r   r=   rP   r&   r   )r   rR   )r
   r(   r)   r   rl   r,   r-   rg   rD   r   r   r   test_string_slice_get_syntax+  s   
r   c                  C   s6   t g d} | jd }t dtjdg}t|| d S )N))r%   r&   )r%   )r=   r'   r@   r%   r&   r'   r
   r   r(   r)   r,   r-   r9   r   r   r   &test_string_slice_out_of_bounds_nested>  s   
r   c                 C   s>   t g d| d}|jd }t dtjdg| d}t|| d S )N)r   r5   Zbar"   r%   or4   r   rD   r   r   r   test_string_slice_out_of_boundsE  s   
r   c                 C   sD   t g d| djd}|jd}t g ddd}t|| d S )N)r4   r5   u   aär"   utf-8r   )r
   r   rn   rm   r,   r-   rD   r   r   r   test_encode_decodeL  s   r   c                 C   sz   t g d| d}d}tjt|d |jd W d    n1 s"w   Y  |jdd}|dd }t|| d S )	N)r4   r5   u   ar"   z['charmap' codec can't encode character '\\x9d' in position 1: character maps to <undefined>r   cp1252ignorec                 S      |  ddS Nr   r   )rn   r   r   r   r   <lambda>^      z*test_encode_errors_kwarg.<locals>.<lambda>)	r
   r   r   UnicodeEncodeErrorr   rn   mapr,   r-   )r/   r   r   r0   r2   r   r   r   test_encode_errors_kwargS  s   r   c                  C   s|   t g d} d}tjt|d | jd W d    n1 s w   Y  | jdd}| dd d}t	|| d S )	N)   a   bs   azS'charmap' codec can't decode byte 0x9d in position 1: character maps to <undefined>r   r   r   c                 S   r   r   )rm   r   r   r   r   r   m  r   z*test_decode_errors_kwarg.<locals>.<lambda>r   )
r
   r   r   UnicodeDecodeErrorr   rm   r   r   r,   r-   )r   r   r0   r2   r   r   r   test_decode_errors_kwargb  s   r   c                 C   <   t ddg}|jjd| d}t ddg| d}t|| d S )Nr   r   r   r"   r4   r5   r
   r   rm   r,   r-   )Zstring_dtyper   r0   r2   r   r   r   test_decode_string_dtypeq     r   c                 C   r   )Nr   s   \ud800r   r"   r4   z\ud800r   )Zobject_dtyper   r0   r2   r   r   r   test_decode_object_dtypey  r   r   c                  C   sT   t ddg} d}tjt|d | jjddd W d    d S 1 s#w   Y  d S )Nr   r   z1dtype must be string or object, got dtype='int64'r   r   rI   r"   )r
   r   r   r   r   rm   )r   r   r   r   r   test_decode_bad_dtype  s
   "r   zform, expectedNFKCr   123	   アイエrT   	   ＡＢＣ	   １２３	   ｱｲｴc                 C   sL   t dddtjdgg d|d}t |g d|d}|j| }t|| d S )Nr   r   r   r   r4   r5   r;   r<   er   r#   )r
   r(   r)   r   rz   r,   r-   )Zformr2   r/   r   r0   r   r   r   test_normalize  s   r  c                 C   s^   t dddtjdgg d| d}tjtdd |jd	 W d    d S 1 s(w   Y  d S )
Nr   r   r   r   r   r   zinvalid normalization formr   Zxxx)r
   r(   r)   r   r   r   r   rz   r/   r   r   r   r   test_normalize_bad_arg_raises  s   "r  c                  C   s4   t g d} t g d}| jd}t|| d S )N)r   r   r   )r   r   r   r   )r   r   rz   r,   r   )idxr2   r0   r   r   r   test_normalize_index  s   r  zvalues,inferred_typestring)r4   r5   r%   zmixed-integer)r4   r5   ?mixed)r4   r5   r  r%   i  c                 C   s2   || }|t u r|j|ksJ t|jtsJ d S )N)r   inferred_typer   r   r   )r   r	  r   r   r   r   r   "test_index_str_accessor_visibility  s   r
  ZfloatingZ
datetime64Ztimedelta64c                 C   s\   || }|t u r|j|ksJ d}tjt|d |j W d    d S 1 s'w   Y  d S )Nz-Can only use .str accessor with string valuesr   )r   r	  r   r   AttributeErrorr   )r   r	  r   r   r   r   r   r   0test_index_str_accessor_non_string_values_raises  s   "r  c                  C   sZ   t ddg} | jdksJ d}tjt|d | j W d    d S 1 s&w   Y  d S )N)r4   r5   r  z5Can only use .str accessor with Index, not MultiIndexr   )r	   Zfrom_tuplesr	  r   r   r  r   )r  r   r   r   r   )test_index_str_accessor_multiindex_raises  s   "r  c                 C   sL   t td| d}tjtdd d|j_W d    d S 1 sw   Y  d S )NZaabbcder"   z You cannot add any new attributer   r4   )r
   r   r   r   r  r   Zxlabelr  r   r   r   #test_str_accessor_no_new_attributes  s   
"r  c                  C   sx   t ttddt} t ttddt}d}tjt|d | j	
| W d    d S 1 s5w   Y  d S )NabcZS1defz9Cannot use .str.cat with values of inferred dtype 'bytes'r   )r
   r(   Zarrayr   r   r8   r   r   r   r   rV   )ZlhsZrhsr   r   r   r   test_cat_on_bytes_raises  s   "r  c                  C   s<   t tdd} tg d}| jdd dd}t|| d S )Nr  r  )zA/DzB/EzC/Fc                 S   s   d | j S )N/)r]   r   rZ   )fr   r   r   r     s    z1test_str_accessor_in_apply_func.<locals>.<lambda>r%   rN   )r   zipr
   Zapplyr,   r-   )Zdfr2   r0   r   r   r   test_str_accessor_in_apply_func  s   r  c                  C   sr   t ddddtjg} t dddtjtjgtd}t| jd| t d	d
g} t ddg}t| jd| d S )N-11Z1000r   z-01Z001r"   r=   -2+5z-0002z+0005r@   )r
   r(   r)   r8   r,   r-   r   zfillvaluer2   r   r   r   
test_zfill  s   r  c                  C   s`   t ddg} d}dt|j }tjt|d | j| W d    d S 1 s)w   Y  d S )Nr  r  r4   z#width must be of integer type, not r   )r
   r   r   r   r   r   r   r  )r  Zwidr   r   r   r   $test_zfill_with_non_integer_argument  s   "r  c                  C   s0   t g d} t g d}t| jd| d S )N)z-catr  z+dog)z-0catz-0001z+0dogr@   )r
   r,   r-   r   r  r  r   r   r   test_zfill_with_leading_sign  s   r  c                  C   sr   t ddddddddig} | jd}t g d	td
}t|| | jd}t g dtd
}t|| d S )NHelloWorld)namer  GoodbyePlanetr  Sear"  )r   r#  Nr"   )r!  r$  r%  )r
   r   rl   r8   r,   r-   )sr0   r2   r   r   r   test_get_with_dict_label  s   r'  c                  C   s8   t ddgjjddd} t ddgdd	}t| | d S )
N   x   yzUTF-8strict)encodingerrorsr   yr   r"   r   )r0   r2   r   r   r   test_series_str_decode  s   r.  c                 C   s(  | }|t krd nt}tg d}tg d|d}d}tj||d ||B }W d    n1 s0w   Y  ||tB }t|| tj||d ||@ }W d    n1 sXw   Y  ||t@ }t|| tj||d ||A }W d    n1 sw   Y  ||tA }t|| d S )N)TFFT)rK   rK   r5   r;   r"   z$operations between boolean dtype andr   )r8   DeprecationWarningr
   r,   Zassert_produces_warningr   rU   r-   )r/   r#   warnleftrightr   r0   r2   r   r   r   test_reversed_logical_ops$  s(   


r3  c                 C   s   | t krtjjdd}|| td}tddtg| d}|| }t|d |d |jj	gt d}t
|| || }td| d| |jj	gt d}t
|| d S )Nzqwith NA present we go through _masked_arith_op which raises TypeError bc Path is not recognized by lib.is_scalar.)reasonz/Users/Irv/r   r   r"   )r8   r   markZxfailZapplymarkerr   r
   r   r#   Zna_valuer,   r-   )r/   Zrequestr5  r   r   r0   r2   r   r   r   test_pathlib_path_division<  s   
r6  )Pr   r   Zpathlibr   Znumpyr(   r   Zpandas.compatr   Zpandasr   r   r   r	   r
   r   Zpandas._testingZ_testingr,   Zpandas.core.strings.accessorr   Zpandas.tests.stringsr   r5  Zparametrizer   r!   r3   r:   rE   rF   rH   r   r   r   filterwarningsr)   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r
  r  r  r  r  r  r  r  r  r'  r.  r3  r6  r   r   r   r   <module>   s    






R







%	





$$




















