� ���gmO���UddlZddlZddlZddlZddlZddlZddlmZddlm Z ddl m Z m Z m Z mZmZddlZddlZddlmZddlmZddlmZdd lmZe rdd lmZee��Zd ada ed e!d <da"eej#e!d<Gd�d ��Z$d�Z%d�Z&d�Z'd�Z(de)fd�Z*de+fd�Z,Gd�d��Z-ej.��Z/ia0e1e+e)fe!d<ddde+fd�Z2d0de3de+fd�Z4d �Z5d0d!e+fd"�Z6d1d#e d$ee+de+fd%�Z7 d2d#e d'e8d(e1e+e fd)ee9e+d*ee9e+d+e)de1e+e ffd,�Z: d3d-e)d)ee9e+d*ee9e+d.ee9e+d+e)d$ee+f d/�Z;dS)4�N)�wraps)�Path)� TYPE_CHECKING�Any�Callable�Optional�Union�)�config)�"INVALID_WINDOWS_CHARACTERS_IN_PATH)�dumps)� get_logger)�DatasetT� _TempCacheDir�_TEMP_DIR_FOR_TEMP_CACHE_FILES� _DATASETS_WITH_TABLE_IN_TEMP_DIRc�$�eZdZdZd�Zd�Zd�ZdS)rz� A temporary directory for storing cached Arrow files with a cleanup that frees references to the Arrow files before deleting the directory itself to avoid permission errors on Windows. c��tjtj���|_t j||j��|_dS)N)�prefix) �tempfile�mkdtempr �TEMP_CACHE_DIR_PREFIX�name�weakref�finalize�_cleanup� _finalizer��selfs �d/home/asafur/pinokio/api/open-webui.git/app/env/lib/python3.11/site-packages/datasets/fingerprint.py�__init__z_TempCacheDir.__init__3s4���$�F�,H�I�I�I�� �!�*�4���?�?�����c�&�t��D]}|����tj�|j��rG t j|j��dS#t$r}td|j�d���|�d}~wwxYwdS)NzBAn error occured while trying to delete temporary cache directory z. Please delete it manually.) �(get_datasets_with_cache_file_in_temp_dir�__del__�os�path�existsr�shutil�rmtree� Exception�OSError)r�dset�es r rz_TempCacheDir._cleanup7s���<�>�>� � �D� �L�L�N�N�N�N� �7�>�>�$�)� $� $� � �� �d�i�(�(�(�(�(��� � � ��A�Y]�Yb�A�A�A��������� ���� � s� A&�& B�0B � Bc�d�|j���r|���dSdS�N)r�detachrrs r �cleanupz_TempCacheDir.cleanupBs3�� �?� !� !� #� #� � �M�M�O�O�O�O�O� � r"N)�__name__� __module__� __qualname__�__doc__r!rr2�r"r rr-sN�������� @�@�@� � � �����r"c���t�dSt�tj��at d�|jD����rt�|��dSdS)a This function registers the datasets that have cache files in _TEMP_DIR_FOR_TEMP_CACHE_FILES in order to properly delete them before deleting the temporary directory. The temporary directory _TEMP_DIR_FOR_TEMP_CACHE_FILES is used when caching is disabled. Nc3�|K�|]7}ttj��t|d��jvV��8dS)�filenameN)rrr�parents)�.0� cache_files r � <genexpr>z?maybe_register_dataset_for_temp_dir_deletion.<locals>.<genexpr>SsW���� � � � � +� 0�1�1�T�*�Z�:P�5Q�5Q�5Y�Y� � � � � � r")rrr�WeakSet�any� cache_files�add)�datasets r �,maybe_register_dataset_for_temp_dir_deletionrDGs|�� &�-���(�/�+2�?�+<�+<�(� � � �!�-� � � ���6� )�,�,�W�5�5�5�5�5� 6�6r"c�<�t�tt��ngSr0)r�listr7r"r r$r$Zs��5U�5a�4�0� 1� 1� 1�gi�ir"c� �dadS)�� When applying transforms on a dataset, the data are stored in cache files. The caching mechanism allows to reload an existing cache file if it's already been computed. Reloading a dataset is possible since the cache files are named using the dataset fingerprint, which is updated after each transform. If disabled, the library will no longer reload cached datasets files when applying transforms to the datasets. More precisely, if the caching is disabled: - cache files are always recreated - cache files are written to a temporary directory that is deleted when session closes - cache files are named using a random hash instead of the dataset fingerprint - use [`~datasets.Dataset.save_to_disk`] to save a transformed dataset or it will be deleted when session closes - caching doesn't affect [`~datasets.load_dataset`]. If you want to regenerate a dataset from scratch you should use the `download_mode` parameter in [`~datasets.load_dataset`]. TN��_CACHING_ENABLEDr7r"r �enable_cachingrK^s��$���r"c� �dadS)rHFNrIr7r"r �disable_cachingrMss��$���r"�returnc�*�tt��S)a� When applying transforms on a dataset, the data are stored in cache files. The caching mechanism allows to reload an existing cache file if it's already been computed. Reloading a dataset is possible since the cache files are named using the dataset fingerprint, which is updated after each transform. If disabled, the library will no longer reload cached datasets files when applying transforms to the datasets. More precisely, if the caching is disabled: - cache files are always recreated - cache files are written to a temporary directory that is deleted when session closes - cache files are named using a random hash instead of the dataset fingerprint - use [`~datasets.Dataset.save_to_disk`]] to save a transformed dataset or it will be deleted when session closes - caching doesn't affect [`~datasets.load_dataset`]. If you want to regenerate a dataset from scratch you should use the `download_mode` parameter in [`~datasets.load_dataset`]. )�boolrJr7r"r �is_caching_enabledrQ�s��$ � � !� !�!r"c�D�t�t��atjS)z7Return a directory that is deleted when session closes.)rrrr7r"r �#get_temporary_cache_files_directoryrS�s��&�-�)6���&� )� .�.r"c��eZdZUdZiZeed<d�Zede e e e fde fd���Z edede fd���Zdeddfd �Zde fd �ZdS) �Hasherz-Hasher that accepts python objects as inputs.�dispatchc�6�tj��|_dSr0)�xxhash�xxh64�mrs r r!zHasher.__init__�s���������r"�valuerNc��t|t��r|gn|}tj��}|D]}|�|���|���Sr0)� isinstance�bytesrXrY�update� hexdigest)�clsr[rZ�xs r � hash_byteszHasher.hash_bytes�sU��%�e�U�3�3�>������ �L�N�N��� � �A� �H�H�Q�K�K�K�K��{�{�}�}�r"c�F�|�t|����Sr0)rcr )rar[s r �hashz Hasher.hash�s���~�~�e�E�l�l�+�+�+r"Nc� �dt|���d�}|�|��}|j�|�d����|j�|�d����dS)Nz==�utf8zutf-8)�typererZr_�encode)rr[�header_for_update�value_for_updates r r_z Hasher.update�sw��0��e���0�0�0���9�9�U�+�+�� �� � �'�.�.�v�6�6�7�7�7� �� � �&�-�-�g�6�6�7�7�7�7�7r"c�4�|j���Sr0)rZr`rs r r`zHasher.hexdigest�s���v���!�!�!r")r3r4r5r6rV�dict�__annotations__r!� classmethodr r^rF�strrcrrer_r`r7r"r rUrU�s��������7�7��H�d���� � � ���u�U�D��K�%7�8��S�����[���,��,��,�,�,��[�,�8�C�8�D�8�8�8�8� "�3�"�"�"�"�"�"r"rU�fingerprint_warningsrCrc�j�|j}t��}t|��D]9}|dkr� |�|��|�||���:|jD]:}|�t j�|d�����;|���S)N� _fingerprintr:) �__dict__rU�sortedr_rAr&r'�getmtimer`)rC�state�hasher�keyr=s r �generate_fingerprintrz�s��� � �E� �X�X�F��e�}�}�"�"�� �.� � � �� � �c����� � �e�C�j�!�!�!�!��)�@�@� �� � �b�g�&�&�z�*�'=�>�>�?�?�?�?� � � � � �r"�@�nbitsc�H�t�|��d|dz�d��S)N�0�rb)�fingerprint_rng� getrandbits)r|s r �generate_random_fingerprintr��s+���)�)�%�0�0� A�5�A�:� A� A� A� A�Ar"c ���t��}|�|�� |�|��n�#trct�dd��s)t �d|�d���dtd<n=t �d|�d���nt �d|�d���t��cYSxYwt|��D]�}|�|�� |�||���4#tr{t�dd��s5t �d|�d ||�d |�d���dtd<nUt �d|�d ||�d |�d���n*t �d|�d ||�d |�d���t��cYcSxYw|� ��S) N�(update_fingerprint_transform_hash_failedFz Transform a� couldn't be hashed properly, a random hash was used instead. Make sure your transforms and parameters are serializable with pickle or dill for the dataset fingerprinting and caching to work. If you reuse this transform, the caching mechanism will consider it to be different from the previous calls and recompute everything. This warning is only showed once. Subsequent hashing failures won't be showed.Tz= couldn't be hashed properly, a random hash was used instead.zn couldn't be hashed properly, a random hash was used instead. This doesn't affect caching since it's disabled.z Parameter 'z'=z of the transform ) rUr_rJrq�get�logger�warning�infor�rur`)� fingerprint� transform�transform_argsrxrys r �update_fingerprintr��s��� �X�X�F� �M�M�+����-�� � �i� � � � ��-� � �'�+�+�,V�X]�^�^� s����e��e�e�e���� TX�$�%O�P�P�� � �q��q�q�q�r�r�r�r� �K�K�W�Y�W�W�W� � � �+�,�,�,�,�,�����n�%�%�1�1��� � �c���� 1� �M�M�.��-� .� .� .� .�� 1�� �+�/�/�0Z�\a�b�b� ��N�N�i�c�i�i�^�C�-@�i�i�T]�i�i�i���� X\�(�)S�T�T��K�K�]�c�]�]�^�C�-@�]�]�T]�]�]�]������ � �J�#�J�J���)<�J�J�PY�J�J�J����/�0�0� 0� 0� 0� 0� 0���� � � � � �s�;�BC�?D�B<Gr�c ��t|t��r|std|�d����tD]!}||vrtdt�d|�d�����"t |��|kr&td|�d|�dt |���d ����d S) z� Make sure the fingerprint is a non-empty string that is not longer that max_length=64 by default, so that the fingerprint can be used to name cache files without issues. zInvalid fingerprint 'z#': it should be a non-empty string.z5Invalid fingerprint. Bad characters from black list 'z ' found in 'z6'. They could create issues when creating cache files.z&Invalid fingerprint. Maximum lenth is z but 'z ' has length z2.It could create issues when creating cache files.N)r]rp� ValueErrorr �len)r�� max_length� invalid_chars r �validate_fingerprintr�s�� �k�3� '� '�c�{�c��a��a�a�a�b�b�b�:��� � �;� &� &��G�Hj�G�G�yD�G�G�G��� � '�  �;���*�$�$�� @�Z� @� @�{� @� @�ad�ep�aq�aq� @� @� @� � � �%�$r"�func�versionc�<�|j�d|j��}|�|d|��z }|S)zW Format a transform to the format that will be used to update the fingerprint. �.N�@)r4r5)r�r�r�s r � format_transform_for_fingerprintr�)s8���?�8�8�T�%6�8�8�I����]��]�]�"� � �r"F�args�kwargs� use_kwargs� ignore_kwargs�randomized_functionc�����|���}|rmd�tj|��j���D��}|dd�}|dd�}|�t ||����n4|tttj|��j����=�r �fd�|� ��D��}�r �fd�|� ��D��}|r�|� d���o|� d���Ztj � ��^}} } }| dkr| | n| d } tj �| ��|d<d �tj|��j���D��} | � ��D]*\} } | |vr!|| | kr|�| ���+|S) ze Format the kwargs of a transform to the format that will be used to update the fingerprint. c�2�g|]}||jk� |j��Sr7)� VAR_KEYWORDr�r<�ps r � <listcomp>z1format_kwargs_for_fingerprint.<locals>.<listcomp>@s)��d�d�d�Q�QR�VW�Vc�Qc�Qc�!�&�Qc�Qc�Qcr"r Nc�$��i|] \}}|�v� ||�� Sr7r7)r<�k�vr�s �r � <dictcomp>z1format_kwargs_for_fingerprint.<locals>.<dictcomp>Ls+���!e�!e�!e�4�1�a�UV�Zd�Ud�Ud�!�Q�Ud�Ud�Udr"c�$��i|] \}}|�v� ||�� Sr7r7)r<r�r�r�s �r r�z1format_kwargs_for_fingerprint.<locals>.<dictcomp>Ns+���!l�!l�!l�4�1�a�UV�^k�Uk�Uk�!�Q�Uk�Uk�Ukr"�seed� generatoriprc�R�i|]$}|jtjk�|j|j��%Sr7)�default�inspect�_emptyrr�s r r�z1format_kwargs_for_fingerprint.<locals>.<dictcomp>Ws7������RS�R[�_f�_m�Rm�Rm���� �Rm�Rm�Rmr")�copyr�� signature� parameters�valuesr_�zip�next�iter�itemsr��np�random� get_state� default_rng�pop)r�r�r�r�r�r��kwargs_for_fingerprint�params�_r��pos�default_values�default_varname� default_values `` r �format_kwargs_for_fingerprintr�3s$����$�[�[�]�]�� � �d�d�'�"3�D�"9�"9�"D�"K�"K�"M�"M�d�d�d���A�B�B�x���������%�%�c�&�$�&7�&7�8�8�8�8� "� ��g�'��-�-�8�9�9� :� :� � �f�!e�!e�!e�!e�3I�3O�3O�3Q�3Q�!e�!e�!e���m�!l�!l�!l�!l�3I�3O�3O�3Q�3Q�!l�!l�!l���N� !� %� %�f� -� -� 5�:P�:T�:T�U`�:a�:a�:i�!�y�2�2�4�4� �A�t�S�1� #�c� � �4��9�9�t�A�w�D�24�)�2G�2G��2M�2M� "�;� /���#*�#4�T�#:�#:�#E�#L�#L�#N�#N����N�+9�*>�*>�*@�*@�8�8�&��� �4� 4� 4�9O�P_�9`�dq�9q�9q� "� &� &�� 7� 7� 7�� !�!r"�inplace�fingerprint_namesc�2���������4t�t��stdt����������4t�t��stdt���������r�rtd������ndg�������fd�}|S)a� Wrapper for dataset transforms to update the dataset fingerprint using ``update_fingerprint`` Args: inplace (:obj:`bool`): If inplace is True, the fingerprint of the dataset is updated inplace. Otherwise, a parameter "new_fingerprint" is passed to the wrapped method that should take care of setting the fingerprint of the returned Dataset. use_kwargs (:obj:`List[str]`, optional): optional white list of argument names to take into account to update the fingerprint to the wrapped method that should take care of setting the fingerprint of the returned Dataset. By default all the arguments are used. ignore_kwargs (:obj:`List[str]`, optional): optional black list of argument names to take into account to update the fingerprint. Note that ignore_kwargs prevails on use_kwargs. fingerprint_names (:obj:`List[str]`, optional, defaults to ["new_fingerprint"]): If the dataset transforms is not inplace and returns a DatasetDict, then it can require several fingerprints (one per dataset in the DatasetDict). By specifying fingerprint_names, one fingerprint named after each element of fingerprint_names is going to be passed. randomized_function (:obj:`bool`, defaults to False): If the dataset transform is random and has optional parameters "seed" and "generator", then you can set randomized_function to True. This way, even if users set "seed" and "generator" to None, then the fingerprint is going to be randomly generated depending on numpy's current state. In this case, the generator is set to np.random.default_rng(np.random.get_state()[1][0]). version (:obj:`str`, optional): version of the transform. The version is taken into account when computing the fingerprint. If a datase transform changes (or at least if the output data that are cached changes), then one should increase the version. If the version stays the same, then old cached data could be reused that are not compatible with the new transform. It should be in the format "MAJOR.MINOR.PATCH". Nz)use_kwargs is supposed to be a list, not z,ignore_kwargs is supposed to be a list, not z5fingerprint_names are only used when inplace is False�new_fingerprintc�j�����s1t�fd��D����std��d��d�����rBd�jjvrtd��d����d�jjvrtd ��d����t ��� ���t ����������fd ���}d |_|S) Nc3�4�K�|]}|�jjvV��dSr0)�__code__� co_varnames)r<rr�s �r r>z>fingerprint_transform.<locals>._fingerprint.<locals>.<genexpr>�s-�����"c�"c��4�4�=�+D�#D�"c�"c�"c�"c�"c�"cr"z function z is missing parameters z in signaturer�z'seed' must be in z 's signaturer�z'generator' must be in )r�c ����t�||� � � ���}|r|d}|dd�}nF|�ttt j���j������}� rt|j� |��}nN�D]K}|� |���||d<t|j� |��||<�6t||���L�|g|�Ri|��}� r||_|S)N)r�r�r�rr �fingerprint_name) r�r�r�r�r�r�r�r�rsr�r�)r�r�r�rCr�r��outr�r�r�r�r�r�r�s �������r �wrapperz<fingerprint_transform.<locals>._fingerprint.<locals>.wrapper�sC���%B����%�+�$7� &�&�&� "�� ^�#'��7���A�B�B�x���#)�:�:�d�4��8I�$�8O�8O�8Z�3[�3[�.\�.\�#]�#]��� G�"4�W�5I�9�Vl�"m�"m���(9�G�G�$��z�z�"2�3�3�;�EU�.�/A�B�3E�#�0�)�=S�4�4��/�0�0�-�V�4D�-E�F�F�F�F��$�w�0��0�0�0��0�0�C�� 7�'6��$��Jr"r�)�allr�r�r�r�r�_decorator_name_) r�r�r�r�r�r�r�r�r�s ` @������r rsz+fingerprint_transform.<locals>._fingerprint�s������ h�s�"c�"c�"c�"c�Qb�"c�"c�"c�c�c� h��f��f�f�FW�f�f�f�g�g� g� � O��T�]�6�6�6� �!H�d�!H�!H�!H�I�I�I��$�-�";�;�;� �!M�4�!M�!M�!M�N�N�N�4�T�7�K�K�K� � �t���& �& �& �& �& �& �& �& �& �& � ��& �P$1�� ��r")r]rFr�rh)r�r�r�r�r�r�rss`````` r �fingerprint_transformr�`s���������F��j��T�&B�&B���W�T�*�EU�EU�W�W�X�X�X�� ��M�4�)H�)H� ��Z��Z�HX�HX�Z�Z�[�[�[��R�$�R��P�Q�Q�Q�->�-J�)�)�Qb�Pc��6�6�6�6�6�6�6�6�6�6�p �r")r{r0)NNF)NNNFN)<r�r&r�r)rr� functoolsr�pathlibr�typingrrrrr �numpyr�rX�r �namingr � utils._dillr � utils.loggingr� arrow_datasetrr3r�rJrrnrr?rrDr$rKrMrPrQrprSrU�Randomr�rqrmrz�intr�r�r�r��tuplerFr�r�r7r"r �<module>r�s ������� � � � � � � � � � � � ���������������������@�@�@�@�@�@�@�@�@�@�@�@�@�@����� � � � �������6�6�6�6�6�6�������%�%�%�%�%�%��'�&�&�&�&�&�&� ��H� � �� ��<@���� 9�@�@�@�>B� �(�7�?�";�B�B�B���������46�6�6�&j�j�j����*���*"�D�"�"�"�"�*/�S�/�/�/�/�"�"�"�"�"�"�"�"�D �&�-�/�/��(*��d�3��9�o�*�*�*� �)� �� � � � �B�B�s�B�C�B�B�B�B�/�/�/�d � �c� � � � �(��8��h�s�m��WZ�����'+�)-� %� *"�*"� �*"� �*"� ��c��N�*"���c��#� *"� �D��I�&� *"� � *"� �#�s�(�^�*"�*"�*"�*"�^'+�)-�-1� %�!� f�f� �f���c��#�f��D��I�&�f� ��S� �*� f� � f� �c�]� f�f�f�f�f�fr"
Memory