� B�gn����UdZddlmZddlmZddlmZmZddlmZddl Z ddl Z ddl Z ddl m Z mZmZmZmZmZddlZddlZddlmZdd lmZdd lmZdd lmZmZmZm Z dd l!m"Z"m#Z#dd l$m%Z%ddl&m'Z'ddl(m)Z)m*Z*m+Z+ddl,m-Z-ddl.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5ddl6m7Z7ddl8m9Z9ddl:m;Z;ddl<m=Z=ddl>m?Z?ddl@mAZAer$ddlBmCZCmDZDddlEmFZFddl mGZGddlHmIZImJZJmKZKmLZLmMZMmNZNdZOdZPdZQdZRd ZSd!ZTd"eP�d#eQ�d#eR�d#eS�d#e?d$d%z�d#e?d&�d'eT�d(�ZUd)eP�d#eQ�d*�ZVd+eP�d#eQ�d#eR�d#e?d$�d#e?d&�d,eT�d#� ZWgd-�ZXed.d/d/��ZYd0eZd1<d�d7�Z[d�d8�Z\d9Z]d0eZd:<d;Z^d0eZd<<d=Z_d0eZd><d?Z`d0eZd@<dAZad0eZdB<d�dE�ZbGdF�dG��ZcGdH�dIec��ZdGdJ�dK��ZeGdL�dM��ZfGdN�dOefejg��Zhe"eU��dPdPddQdPddPddQdRddS� d�df���Zid�dh�Zjd�dm�Zkd�do�Zld�ds�Zmd�dv�Zn d�d�dz�Zoe#e?d&e?d{d|z�}��Gd~�def����Zpd�d��Zqd�d��ZrGd��d���ZsGd��d�ep��ZtGd��d�et��ZudS)�a� Module contains tools for processing Stata files into DataFrames The StataReader below was originally written by Joe Presbrey as part of PyDTA. It has been extended and improved by Skipper Seabold from the Statsmodels project who also developed the StataWriter and was finally added to pandas in a once again improved version. You can find more information on http://presbrey.mit.edu/PyDTA and https://www.statsmodels.org/devel/ �)� annotations)�abc)�datetime� timedelta)�BytesION)�IO� TYPE_CHECKING�AnyStr�Callable�Final�cast)�lib)� infer_dtype)�max_len_string_array)�CategoricalConversionWarning�InvalidColumnName�PossiblePrecisionLoss�ValueLabelTypeMismatch)�Appender�doc)�find_stack_level)�ExtensionDtype)� ensure_object�is_numeric_dtype�is_string_dtype)�CategoricalDtype)� Categorical� DatetimeIndex�NaT� Timestamp�isna� to_datetime� to_timedelta)� DataFrame)�Index)� RangeIndex)�Series)� _shared_docs)� get_handle)�Hashable�Sequence)� TracebackType)�Literal)�CompressionOptions�FilePath� ReadBuffer�Self�StorageOptions� WriteBufferz�Version of given Stata file is {version}. pandas supports importing versions 105, 108, 111 (Stata 7SE), 113 (Stata 8/9), 114 (Stata 10/11), 115 (Stata 12), 117 (Stata 13), 118 (Stata 14/15/16),and 119 (Stata 15/16, over 32,767 variables).z�convert_dates : bool, default True Convert date variables to DataFrame time values. convert_categoricals : bool, default True Read value labels and convert columns to Categorical/Factor variables.aindex_col : str, optional Column to set as index. convert_missing : bool, default False Flag indicating whether to convert missing values to their Stata representations. If False, missing values are replaced with nan. If True, columns containing missing values are returned with object data types and missing values are represented by StataMissingValue objects. preserve_dtypes : bool, default True Preserve Stata datatypes. If False, numeric data are upcast to pandas default types for foreign data (float64 or int64). columns : list or None Columns to retain. Columns will be returned in the given order. None returns all columns. order_categoricals : bool, default True Flag indicating whether converted categorical data are ordered.zzchunksize : int, default None Return StataReader object for iterations, returns chunks with given number of lines.z=iterator : bool, default False Return StataReader object.z�Notes ----- Categorical variables read through an iterator may not have the same categories and dtype. This occurs when a variable stored in a DTA file is associated to an incomplete set of value labels that only label a strict subset of the values.a> Read Stata file into DataFrame. Parameters ---------- filepath_or_buffer : str, path object or file-like object Any valid string path is acceptable. The string could be a URL. Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is expected. A local file could be: ``file://localhost/path/to/table.dta``. If you want to pass in a path object, pandas accepts any ``os.PathLike``. By file-like object, we refer to objects with a ``read()`` method, such as a file handle (e.g. via builtin ``open`` function) or ``StringIO``. � �decompression_options�filepath_or_buffer�storage_optionsz� Returns ------- DataFrame or pandas.api.typing.StataReader See Also -------- io.stata.StataReader : Low-level reader for Stata data files. DataFrame.to_stata: Export Stata data files. a Examples -------- Creating a dummy stata for this example >>> df = pd.DataFrame({'animal': ['falcon', 'parrot', 'falcon', 'parrot'], ... 'speed': [350, 18, 361, 15]}) # doctest: +SKIP >>> df.to_stata('animals.dta') # doctest: +SKIP Read a Stata dta file: >>> df = pd.read_stata('animals.dta') # doctest: +SKIP Read a Stata dta file in 10,000 line chunks: >>> values = np.random.randint(0, 10, size=(20_000, 1), dtype="uint8") # doctest: +SKIP >>> df = pd.DataFrame(values, columns=["i"]) # doctest: +SKIP >>> df.to_stata('filename.dta') # doctest: +SKIP >>> with pd.read_stata('filename.dta', chunksize=10000) as itr: # doctest: +SKIP >>> for chunk in itr: ... # Operate on a single chunk, e.g., chunk.mean() ... pass # doctest: +SKIP z�Reads observations from Stata file, converting them into a dataframe Parameters ---------- nrows : int Number of lines to read from data file, if None read whole file. z Returns ------- DataFrame z�Class for reading Stata dta files. Parameters ---------- path_or_buf : path (string), buffer or path object string, path object (pathlib.Path or py._path.local.LocalPath) or object implementing a binary read() functions. z ) �%tc�%tC�%td�%d�%tw�%tm�%tq�%th�%ty��r � stata_epoch�datesr'�fmt�str�returnc���������tjjtjjc��tjt ddd��z j�tjt ddd��z j��dzdzdz��dzdzdz�d$��fd� }d$��fd � }d$����fd � }t j|��}d }|���r d }d |j |<|� t j ��}|� d��rt}|}|||d��} �n�|� d��rFtjdt!�����t#|t$���} |r t&| |<| S|� d��rt}|} ||| d��} �nB|� d��r(tj|dzz} |dzdz} || | ��} �n|� d��r'tj|dzz} |dzdz} || | ��} n�|� d��r*tj|dzz} |dzdzdz} || | ��} n�|� d��r*tj|dzz} |dzd zdz} || | ��} nK|� d!��r#|} t j|��}|| |��} nt+d"|�d#����|r t&| |<| S)%a Convert from SIF to datetime. https://www.stata.com/help.cgi?datetime Parameters ---------- dates : Series The Stata Internal Format date to convert to datetime according to fmt fmt : str The format to convert to. Can be, tc, td, tw, tm, tq, th, ty Returns Returns ------- converted : Series The converted dates Examples -------- >>> dates = pd.Series([52]) >>> _stata_elapsed_date_to_datetime_vec(dates , "%tw") 0 1961-01-01 dtype: datetime64[ns] Notes ----- datetime/c - tc milliseconds since 01jan1960 00:00:00.000, assuming 86,400 s/day datetime/C - tC - NOT IMPLEMENTED milliseconds since 01jan1960 00:00:00.000, adjusted for leap seconds date - td days since 01jan1960 (01jan1960 = 0) weekly date - tw weeks since 1960w1 This assumes 52 weeks in a year, then adds 7 * remainder of the weeks. The datetime value is the start of the week in terms of days in the year, not ISO calendar weeks. monthly date - tm months since 1960m1 quarterly date - tq quarters since 1960q1 half-yearly date - th half-years since 1960h1 yearly date - ty years since 0000 rArB�i��rGr'c���|����kr/|����krtd|z|zd���St|dd��}t d�t ||��D��|���S)z� Convert year and month to datetimes, using pandas vectorized versions when the date range falls within the range supported by pandas. Otherwise it falls back to a slower but more robust method using datetime. �dz%Y%m��format�indexNc�6�g|]\}}t||d����S�rB)r)�.0�y�ms �_/home/asafur/pinokio/api/open-webui.git/app/env/lib/python3.11/site-packages/pandas/io/stata.py� <listcomp>zX_stata_elapsed_date_to_datetime_vec.<locals>.convert_year_month_safe.<locals>.<listcomp>,s(��J�J�J���A�8�A�q�!�,�,�J�J�J��rO)�max�minr"�getattrr'�zip)�year�monthrO�MAX_YEAR�MIN_YEARs ��rU�convert_year_month_safezD_stata_elapsed_date_to_datetime_vec.<locals>.convert_year_month_safe!s���� �8�8�:�:�� � �T�X�X�Z�Z�(�%:�%:��s�T�z�E�1�&�A�A�A� A��D�'�4�0�0�E��J�J��T�5�9I�9I�J�J�J�RW�X�X�X� XrWc�(��|����dz kr;|����kr#t|d���t|d���zSt |dd��}d�t ||��D��}t ||� ��S) z{ Converts year (e.g. 1999) and days since the start of the year to a datetime or datetime64 Series rB�%YrM�d��unitrONc�r�g|]4\}}t|dd��tt|�����z��5S)rB��days)rr�int)rRrSrds rUrVzW_stata_elapsed_date_to_datetime_vec.<locals>.convert_year_days_safe.<locals>.<listcomp>7sK�����?C�q�!���A�q�!�!�I�3�q�6�6�$:�$:�$:�:���rWrX)rYrZr"r#r[r\r')r]rirO�valuer_r`s ��rU�convert_year_days_safezC_stata_elapsed_date_to_datetime_vec.<locals>.convert_year_days_safe.s���� �8�8�:�:��A�� &� &�4�8�8�:�:��+@�+@��t�D�1�1�1�L��C�4P�4P�4P�P� P��D�'�4�0�0�E���GJ�4�QU������E��%�u�-�-�-� -rWc�����t|dd��}|dkrP|����ks|����kr�fd�|D��}t||���Sne|dkrP|����ks|����kr�fd�|D��}t||���Snt d���t ����t ||� ��}�|zS) z� Convert base dates and deltas to datetimes, using pandas vectorized versions if the deltas satisfy restrictions required to be expressed as dates in pandas. rONrdc�P��g|]"}�tt|�����z��#S)rh�rrj�rRrd�bases �rUrVzS_stata_elapsed_date_to_datetime_vec.<locals>.convert_delta_safe.<locals>.<listcomp>Es/���H�H�H�A�$���A���!7�!7�!7�7�H�H�HrWrX�msc�V��g|]%}�tt|��dz���z��&S)rJ)� microsecondsrorps �rUrVzS_stata_elapsed_date_to_datetime_vec.<locals>.convert_delta_safe.<locals>.<listcomp>Is@������GH�D�9�3�q�6�6�D�=�B�B�B�B���rWzformat not understoodre)r[rYrZr'� ValueErrorr"r#) rq�deltasrfrO�values� MAX_DAY_DELTA� MAX_MS_DELTA� MIN_DAY_DELTA� MIN_MS_DELTAs ` ����rU�convert_delta_safez?_stata_elapsed_date_to_datetime_vec.<locals>.convert_delta_safe<s$���� ����.�.�� �3�;�;��z�z�|�|�m�+�+�v�z�z�|�|�m�/K�/K�H�H�H�H��H�H�H���f�E�2�2�2�2�0L��T�\�\��z�z�|�|�l�*�*�f�j�j�l�l�\�.I�.I�����LR������f�E�2�2�2�2� /J� �4�5�5� 5��4� � ���f�4�0�0�0���f�}�rWFTg�?�r8�tcrr�r9�tCz9Encountered %tC format. Leaving in Stata Internal Format.�� stacklevel��dtype)r:�tdr;rdrd�r<�tw�4��r=�tm� �r>�tq���r?�th���r@�tyz Date fmt � not understood)rGr')r rZr]rYrri�np�isnan�any�_values�astype�int64� startswithrC�warnings�warnrr'�objectr� ones_likeru)rDrErarlr|�bad_locs�has_bad_valuesrqrr� conv_datesrir]r^� quarter_month� first_monthrxryr_rzr{r`s @@@@@@rU�#_stata_elapsed_date_to_datetime_vecr��s���������\#��+�Y�]�-?��H�h��]�X�d�A�q�%9�%9�9�?�M��]�X�d�A�q�%9�%9�9�?�M� �2�%��,�t�3�L� �2�%��,�t�3�L� Y� Y� Y� Y� Y� Y� Y� .� .� .� .� .� .� .����������2�x����H��N��|�|�~�~�&���"%�� �h�� �L�L��� "� "�E� �~�~�m�$�$�);��� ��'�'��b�$�7�7� � � ��� � &� &�%;�� � G�'�)�)� � � � ��E��0�0�0� � � '�#&�J�x� ��� ���0� 1� 1�;�����'�'��d�C�8�8� � � ��� � &� &�;���%�2�+�-���� �a���+�+�D�$�7�7� � � ��� � &� &�;���%�2�+�-�����q� ��,�,�T�5�9�9� � � ��� � &� &� ;���%�1�*�,�����a��!�+� �,�,�T�=�A�A� � � ��� � &� &� ;���%�1�*�,�����a��!�#��,�,�T�5�9�9� � � ��� � &� &�;����l�5�)�)� �,�,�T�;�?�?� � ��9�S�9�9�9�:�:�:��#�"� �8�� �rWc���� �|j� d��dz � d)d*��� fd � }t|��}|j� |���rFtj|jd ��rt t��|j|<nt|j|<|d vr||d ���}|j dz }�n`|dvr&tj dt�����|}�n6|dvr||d ���}|j �z}�n|dvr1||d d ���}d|j tj z z|jdzz}n�|dvr0||d ���}d|j tj z z|jzdz }n�|dvr3||d ���}d|j tj z z|jdz dzz}ny|dvrI||d ���}d|j tj z z|jd k�t"��z}n,|d!vr||d ���}|j }nt%d"|�d#����t'|t(jd�$��}t-jd%d&��d'}|||<t'|� d�(��S)+aO Convert from datetime to SIF. https://www.stata.com/help.cgi?datetime Parameters ---------- dates : Series Series or array containing datetime or datetime64[ns] to convert to the Stata Internal Format given by fmt fmt : str The format to convert to. Can be, tc, td, tw, tm, tq, th, ty l�"R:rJFrDr'�delta�boolr]ric����i}tj|jd��r�|rT|tt���d��z }|j�tj ��dz|d<|s|r-t|��}|j j |d<|j j |d<|rf|j�tj ��t|dd���j�tj ��z }|� z|d <n�t|d � ��d kr�|r7|jtz }d� fd� }tj|��} | |��|d<|r9|�d���} | jdz|d<| j|ddzz |d<|r&dd�} tj| ��} | |��|d <nt%d���t'|����S)N�M�nsrJr�r]r^rcrMriF��skipnar�xrrG�floatc�>���|jzd|jzz|jzS)Ni@B)ri�secondsrt)r�� US_PER_DAYs �rU�fzC_datetime_to_stata_elapsed_vec.<locals>.parse_dates_safe.<locals>.f�s#���%���.��1�9�1D�D�q�~�U�UrWc�&�d|jz|jzS)NrL)r]r^�r�s rU�<lambda>zJ_datetime_to_stata_elapsed_vec.<locals>.parse_dates_safe.<locals>.<lambda>�s��3���<�!�'�3I�rWrLrjc�>�|t|jdd��z jS)NrB)rr]rir�s rU�gzC_datetime_to_stata_elapsed_vec.<locals>.parse_dates_safe.<locals>.g�s�������A� 6� 6�6�<�<rWzQColumns containing dates must contain either datetime64, datetime or null values.rX)r�rrGr�)r�rrGrj)r� is_np_dtyper�r rC�as_unitr��viewr�r�r�_datar]r^r"r� vectorize�applyrur$)rDr�r]rird� time_delta� date_index� days_in_nsr��v� year_monthr�� NS_PER_DAYr�rOs ���rU�parse_dates_safez8_datetime_to_stata_elapsed_vec.<locals>.parse_dates_safe�s#��� �� �?�5�;�� ,� ,�& �� G�"�Y�{�%;�%;�%C�%C�D�%I�%I�I� �'�/�4�4�R�X�>�>�$�F��'� �� 4�t� 4�*�5�1�1� �&�,�1��&� �'�-�3��'� �� 5�"�]�/�/���9�9�K��f�I�d�=�=�=��$�$�r�x�.�.�)� �'�*�4��&� �� ��u� -� -� -�� ;� ;�� &�� � �3��V�V�V�V�V�V��L��O�O���Q�u�X�X��'� �� B�"�[�[�)I�)I�J�J� �&�.�#�5��&� �'�/�!�F�)�c�/�A��'� �� %�=�=�=�=��L��O�O���A�e�H�H��&� ���7��� � ��%�(�(�(�(rWr�r}T)r�rz'Stata Internal Format tC not supported.r�)r:r�r�)r]rir�r�r�)r]r�rBr�r�r�r�r�r�r��Format z! is not a known Stata date format)r��copy�<d��r�rOr�)FFF)rDr'r�r�r]r�rir�)rOr!r�rr�r�r"rCr�r�r�r�rr]rir^r�rjrur'r��float64�struct�unpack) rDrEr��bad_locrdr�� missing_valuer�r�rOs @@@rU�_datetime_to_stata_elapsed_vecr��s������ �K�E�/�J��d�"�J�NS�,)�,)�,)�,)�,)�,)�,)�,)�,)�\�5�k�k�G� �K�E��{�{�}�}�1� �?�5�;�� ,� ,� 1�%0��%=�%=�E�M�'� "� "�%0�E�M�'� "� �m��� � �U�$� /� /� /���W�t�^� � � � � � �� � 5�'�)�)� � � � �� � � � � � � � �U�$� /� /� /���W� �*� � � � � � � � �U��D� 9� 9� 9���1�6�K�$4�4�5���!� �C� � � � � � � � �U�� .� .� .���1�6�K�$4�4�5���?�!�C� � � � � � � � �U�� .� .� .���!�&�;�#3�3�4���!� ��7I�I� � � � � � � � �U�� .� .� .���!�&�;�#3�3�4���!� �7K�7K�C�7P�7P�P� � � � � � � � �U�� .� .� .���V� � ��I�3�I�I�I�J�J�J�� �"�*�5�A�A�A�J��M�$�(K�L�L�Q�O�M�'�J�w�� �*�E�� 6� 6� 6�6rWz� Fixed width strings in Stata .dta files are limited to 244 (or fewer) characters. Column '{0}' does not satisfy this restriction. Use the 'version=117' parameter to write the newer (Stata 13 and later) format. �excessive_string_length_errorz� Column converted from {0} to {1}, and some data are outside of the lossless conversion range. This may result in a loss of precision in the saved data. �precision_loss_docz� Stata value labels (pandas categories) must be strings. Column {0} contains non-string labels which will be converted to strings. Please check that the Stata data file created has not lost information due to duplicate labels. �value_label_mismatch_doca; Not all pandas column names were valid Stata variable names. The following replacements have been made: {0} If this is not what you expect, please make sure you have Stata-compliant column names in your DataFrame (strings only, max 32 characters, only alphanumerics and underscores, no Stata reserved words) �invalid_name_doca� One or more series with value labels are not fully labeled. Reading this dataset with an iterator results in categorical variable with different categories. This occurs since it is not possible to know all possible values until the entire dataset has been read. To avoid this warning, you can either read dataset without an iterator, or manually convert categorical data by ``convert_categoricals`` to False and then accessing the variable labels through the value_labels method of the reader. �categorical_conversion_warning�datar$c � �d}tjtjtjftjtjtjftjtjtjftjtjtjftj tjtj ff}tj dd��d}tj dd��d}|D�]G}t||jt��o||jjdv}||���}|rZ||jjdvrdnd }||�|���||jj��||<n�t||jt��r�t+||jd d ���/||�||jj��||<n8t-||j��r||�d ��||<||j} |jddk} |D]�} | | dkr�| s;||���tj| d ��jkr | d } n| d} | dtjkr9||���dkrt4�dd��}||�| ��||<��| tjkrh| sf||���dks||���dkr(||�tj��||<�nQ| tjkrh| sf||���dks||���dkr(||�tj��||<�n�| tjkr�| s<||���dkrH||���dkr*||�tj��||<�na||�tj ��||<||���dks||���dkrt4�dd��}n�| tjtj fvr�tj||�����rtAd|�d����||���} | tjkr/| |kr)||�tj ��||<n/| tj kr| |krtAd|�d| �d|�d����|rB|���r.tBj"||jj#} | |j$||f<��I|r(tKj&|tNtQ�����|S) a- Checks the dtypes of the columns of a pandas DataFrame for compatibility with the data types and ranges supported by Stata, and converts if necessary. Parameters ---------- data : DataFrame The DataFrame to check and convert Notes ----- Numeric columns in Stata must be one of int8, int16, int32, float32 or float64, with some additional value restrictions. int8 and int16 columns are checked for violations of the value restrictions and upcast if needed. int64 data is not usable in Stata, and so it is downcast to int32 whenever the value are in the int32 range, and sidecast to float64 when larger than this range. If the int64 values are outside of the range of those perfectly representable as float64 values, a warning is raised. bool columns are cast to int8. uint columns are converted to int of the same size if there is no loss in precision, otherwise are upcast to a larger type. uint64 is currently not supported since it is concerted to object in a DataFrame. ��<f����~rr����������iub�iuF� numpy_dtypeNr�rBr�l�uint64r�rL����������������l����r�zColumn zM contains infinity or -infinitywhich is outside the range supported by Stata.z has a maximum value (z() outside the range supported by Stata (�)r�))r��bool_�int8�uint8�int16�uint16�int32�uint32r�r�r�r�r�� isinstancer�r�kindr!�fillnar�r�r[r�shaperY�iinfor�rNrZ�float32�isinfr�ru�StataMissingValue�BASE_MISSING_VALUES�name�locr�r�rr)r��ws�conversion_data� float32_max� float64_max�col�is_nullable_int� orig_missing�fvr��empty_df�c_datark�sentinels rU�_cast_to_stata_typesr#s���4 �B� ��2�7�B�G�$� ��2�7�B�H�%� ��B�H�b�h�'� ��B�H�b�h�'� ��B�H�b�j�)�  � ��-��&9�:�:�1�=�K��-��&I�J�J�1�M�K��D7�D7�� �t�C�y��� 7� 7� .��S� ��$��-� � �C�y�~�~�'�'� � � 7��3�i�o�*�d�2�2����B��S� �(�(��,�,�3�3�D��I�O�4O�P�P�D��I�I� ��S� ��� 8� 8� 7��t�C�y�� �t�<�<�H� ��I�,�,�T�#�Y�_�-H�I�I��S� � � ��c���1�1� 7� ��I�,�,�X�6�6��S� ��S� ����:�a�=�A�%��%� 4� 4�F���q� �!�!��&�t�C�y�}�}���"�(�6�!�9�2E�2E�2I�I�I�"�1�I�E�E�"�1�I�E��!�9���(�(��C�y�}�}���%�/�/�/�6�6�x��K�K�� ��I�,�,�U�3�3��S� �� �B�G� � �H� ��C�y�}�}����$�$��S� � � ���$�(>�(>� ��I�,�,�R�X�6�6��S� �� �b�h� � �x� ��C�y�}�}����&�&�$�s�)�-�-�/�/�F�*B�*B� ��I�,�,�R�X�6�6��S� �� �b�h� � �� G��S� � � ���:�-�-�$�s�)�-�-�/�/�[�2P�2P� ��I�,�,�R�X�6�6��S� � � ��I�,�,�R�Z�8�8��S� ���9�=�=�?�?�e�+�+�t�C�y�}�}���(�/J�/J�+�2�2�7�I�F�F�B�� �r�z�2�:�.� .� .��x��S� �"�"�&�&�(�(� � �E�c�E�E�E������I�M�M�O�O�E��� �"�"�u�{�':�':� ��I�,�,�R�Z�8�8��S� � ��"�*�$�$��;�&�&�$�>�#�>�>�U�>�>�/:�>�>�>���� � 7����!�!� 7�,�@��c���AU�V��.6����s�*�+�� � �� � � !�'�)�)� � � � � �KrWc�.�eZdZdZ ddd �Zdd �Zdd�ZdS)�StataValueLabelz� Parse a categorical column and prepare formatted output Parameters ---------- catarray : Series Categorical Series to encode encoding : {"latin-1", "utf-8"} Encoding to use for value labels. �latin-1�catarrayr'�encoding�Literal['latin-1', 'utf-8']rG�Nonec��|dvrtd���|j|_||_|jj}t |��|_|���dS)N�r �utf-8�%Only latin-1 and utf-8 are supported.) rur��labname� _encoding�cat� categories� enumerate� value_labels�_prepare_value_labels)�selfr r rs rU�__init__zStataValueLabel.__init__�sa�� �/� /� /��D�E�E� E��}�� �!����\�,� �%�j�1�1��� �"�"�$�$�$�$�$rWc��d|_g|_d|_tjgtj���|_tjgtj���|_d|_g}g}|j D�] }|d}t|t��sTt|��}tj t�|j��t"t%�����|�|j��}|�|j��|xjt|��dzz c_|�|d��|j�|��|xjdz c_��|jdkrt-d���tj|tj���|_tj|tj���|_dd|jzzd|jzz|jz|_d S) zEncode value labels.rr�rBr�i}zaStata value labels for a single variable must have a combined length less than 32,000 characters.�r�N)�text_len�txt�nr��arrayr��off�val�lenrr�rFr�r�r�rNrrr�encoder�appendru)r�offsetsrw�vl�categorys rUrz%StataValueLabel._prepare_value_labels�s����� � "�������8�B�b�h�/�/�/����8�B�b�h�/�/�/������ �� ���#� � �B�$&�q�E�H��h��,�,� ��x�=�=��� �,�3�3�D�L�A�A�*�/�1�1�����  ���t�~�6�6�H� �N�N�4�=� )� )� )� �M�M�S��]�]�Q�.� .�M�M� �M�M�"�Q�%� � � � �H�O�O�H� %� %� %� �F�F�a�K�F�F�F� �=�5� � ��F��� � �8�G�2�8�4�4�4����8�F�"�(�3�3�3����1�t�v�:�%��D�F� �2�T�]�B����rW� byteorderrF�bytesc���|j}t��}d}|�tj|dz|j����t |j��dd��|��}|dvrdnd}t||dz��}|�|��td��D]*}|�tjd |�����+|�tj|dz|j ����|�tj|dz|j ����|j D]-}|�tj|dz|�����.|jD]-} |�tj|dz| �����.|jD]} |�| |z���|���S) a! Generate the binary representation of the value labels. Parameters ---------- byteorder : str Byte order of the output Returns ------- value_label : bytes Bytes containing the formatted value label ��iN� )r�utf8�rBr��c)rr�writer��packr#rFrr$� _pad_bytes�rangerrr!r"r�getvalue) rr)r �bio� null_byter�lab_lenr-�offsetrk�texts rU�generate_value_labelz$StataValueLabel.generate_value_label�s����>���i�i��� � � � �&�+�i�#�o�t�x�8�8�9�9�9��d�l�#�#�C�R�C�(�/�/��9�9�� �(9�9�9�"�"�s���W�g��k�2�2�� � � �'�����q��� 3� 3�A� �I�I�f�k�#�y�1�1� 2� 2� 2� 2� � � �&�+�i�#�o�t�v�6�6�7�7�7� � � �&�+�i�#�o�t�}�=�=�>�>�>��h� <� <�F� �I�I�f�k�)�c�/�6�:�:� ;� ;� ;� ;��X� ;� ;�E� �I�I�f�k�)�c�/�5�9�9� :� :� :� :��H� (� (�D� �I�I�d�Y�&� '� '� '� '��|�|�~�~�rWN�r )r r'r r rGr �rGr )r)rFrGr*)�__name__� __module__� __qualname__�__doc__rrr<�rWrUrr�sk������ � �IR� %� %� %� %� %�*C�*C�*C�*C�X2�2�2�2�2�2rWrc��eZdZdZ d dd �Zd S)�StataNonCatValueLabela  Prepare formatted version of value labels Parameters ---------- labname : str Value label name value_labels: Dictionary Mapping of values to labels encoding : {"latin-1", "utf-8"} Encoding to use for value labels. r rrFr�dict[float, str]r r rGr c���|dvrtd���||_||_t|���d����|_|���dS)Nrrc��|dS)NrrCr�s rUr�z0StataNonCatValueLabel.__init__.<locals>.<lambda>2s ���!��rW)�key)rurr�sorted�itemsrr)rrrr s rUrzStataNonCatValueLabel.__init__&sq�� �/� /� /��D�E�E� E��� �!���"� � � � � �n�n� � � ��� �"�"�$�$�$�$�$rWNr=)rrFrrFr r rGr )r?r@rArBrrCrWrUrErEs<������ � �"1:� %�%�%�%�%�%�%rWrEc��eZdZUdZiZded<dZded<eD]-Zdee<edd ��D]Z de d e z��zee ez<��.d Z d ed <e j dd��dZded<ed ��D]zZ e j de ��dZdee<e dkreexxe d e z��z cc<e j de jde����dezZe jde��Z �{dZd ed<e j dd��dZed ��D]zZ e j de��dZdee<e dkreexxe d e z��z cc<e j de jde����dezZe jde��Z�{ddde j de ��de j de��dd�Zded<d0d"�Zed1d$���Zed2d%���Zd1d&�Zd1d'�Zd3d+�Zed4d.���Zd/S)5r�a� An observation's missing value. Parameters ---------- value : {int, float} The Stata missing value code Notes ----- More information: <https://www.stata.com/help.cgi?missing> Integer missing values make the code '.', '.a', ..., '.z' to the ranges 101 ... 127 (for int8), 32741 ... 32767 (for int16) and 2147483621 ... 2147483647 (for int32). Missing values for floating point data types are more complex but the pattern is simple to discern from the following table. np.float32 missing values (float in Stata) 0000007f . 0008007f .a 0010007f .b ... 00c0007f .x 00c8007f .y 00d0007f .z np.float64 missing values (double in Stata) 000000000000e07f . 000000000001e07f .a 000000000002e07f .b ... 000000000018e07f .x 000000000019e07f .y 00000000001ae07f .z rF�MISSING_VALUES)�e������r �bases�.rB��`�r*� float32_basez<isrrj� increment_32r�r�� float64_base�qsr�rNrOrP)r�r�r�r�r�r�rkr�rGr c��||_|dkrt|��nt|��}|j||_dS)Nl)�_valuerjr�rM�_str�rrks rUrzStataMissingValue.__init__�s;���� �#�j�0�0��E� � � �e�E�l�l���'��.�� � � rWrFc��|jS)z� The Stata representation of the missing value: '.', '.a'..'.z' Returns ------- str The representation of the missing value. )r\�rs rU�stringzStataMissingValue.string�s ���y�rWc��|jS)z� The binary representation of the missing value. Returns ------- {int, float} The binary representation of the missing value. )r[r_s rUrkzStataMissingValue.value�s ���{�rWc��|jS�N)r`r_s rU�__str__zStataMissingValue.__str__�s ���{�rWc�,�t|���d|�d�S)N�(r�)�typer_s rU�__repr__zStataMissingValue.__repr__�s���t�*�*�&�&�t�&�&�&�&rW�otherr�r�c�|�t|t|����o|j|jko|j|jkSrc)r�rgr`rk)rris rU�__eq__zStataMissingValue.__eq__�s<�� �u�d�4�j�j� )� )� *�� �u�|�+� *�� �e�k�)� rWr��np.dtypec�n�|jtjur|jd}n�|jtjur|jd}nr|jtjur|jd}nQ|jtjur|jd}n0|jtjur|jd}ntd���|S)Nr�r�r�r�r�zUnsupported dtype) rgr�r�r�r�r�r�r�ru)�clsr�rks rU�get_base_missing_valuez(StataMissingValue.get_base_missing_value�s��� �:��� � ��+�F�3�E�E� �Z�2�8� #� #��+�G�4�E�E� �Z�2�8� #� #��+�G�4�E�E� �Z�2�:� %� %��+�I�6�E�E� �Z�2�:� %� %��+�I�6�E�E��0�1�1� 1�� rWN)rkr�rGr �rGrF)rGr�)rir�rGr�)r�rlrGr�)r?r@rArBrM�__annotations__rQ�br5r-�chrrVr�r�rWrIr3� int_valuerX� increment_64r�r�propertyr`rkrdrhrk� classmethodrorCrWrUr�r�7s\�������"�"�J(*�N�)�)�)�)�+�E�+�+�+�+� �6�6����q����q�"��� 6� 6�A�$'�#�#�b�1�f�+�+�$5�N�1�q�5� !� !� 6�.�L�-�-�-�-�%�� �d�,?�@�@��C�L�C�C�C�C� �U�2�Y�Y�4�4���f�m�D�,�/�/��2��!��s�� �q�5�5� �3� � � �3�3�r�A�v�;�;� .� � � �!�F�M�$� �� �D�#�(>�(>�?�?��B�\�Q� �"�v�{�4��3�3� � �=�L�=�=�=�=� �6�=��&I�J�J�1�M�L� �U�2�Y�Y�3�3���f�m�D�,�/�/��2��!��s�� �q�5�5� �3� � � �3�3�r�A�v�;�;� .� � � �!�F�M�#�{�v�{�4��'=�'=�>�>�q�A�L�P� �"�v�{�3� �2�2� � ���� �6�=��|�4�4�Q�7� �6�=��|�4�4�Q�7� "�"������/�/�/�/� � � � ��X� �� � � ��X� �����'�'�'�'� � � � �� � � ��[� � � rWr�c��eZdZdd�ZdS)� StataParserrGr c �6�td�tdd��D��dtjtj��fdtjtj��fdtjtj��fdtjtj��fdtjtj��fgz��|_ tjtj ��tjtj��tjtj��tjtj��tjtj��tjtj��d �|_ tttd����td ��z��|_d d d dddd �|_d}d}d}d}dddtjt!jd|��d��tjt!jd|��d��ftjt!jd|��d��tjt!jd|��d��fd�|_dddddd�|_dddtjt!jdd ��d��tjt!jdd!��d��d�|_d"d#d$d%d&d'd(�|_hd)�|_dS)*Nc�@�g|]}|tjd|����f��S)�S)r�r�)rRr-s rUrVz(StataParser.__init__.<locals>.<listcomp>�s-�� ;� ;� ;��a���'�a�'�'�"�"� #� ;� ;� ;rWrB������������)������������������bhlfd�Qrdr��l�hrrs����r�s��������r�)r�rL)r�r�)r�r�r�rr�)rrr�r�r�rd)�b�i�l�frLrNrOrPrUr��i1�i2�i4�f4�f8�u8)rrr�r�r�rdr�><�str#�_N�_b�_n�do�if�in�_pi�_rc�_se�end�forrj�NULL�_all�byte�case�else�enum�goto�long�quad�strL�with�_coef�_cons�_pred�_skipr �break�catch�class�constr��local�short�using�delete�double�export�friend�global�inline�pragma�boolean�complex�default�typedef�virtual�continue�delegate�explicit�external�function�typename� aggregate� colvector� eltypedef� protected� rowvector)�dictr5r�r�r�r�r�r�r�� DTYPE_MAPr�� DTYPE_MAP_XML�list�tuple�TYPE_MAP� TYPE_MAP_XMLr�r�� VALID_RANGE�OLD_TYPE_MAPPINGrM�NUMPY_TYPE_MAP�RESERVED_WORDS)r� float32_minr�� float64_minr�s rUrzStataParser.__init__�s��� � ;� ;�U�1�c�]�]� ;� ;� ;��b�h�r�w�'�'�(��b�h�r�x�(�(�)��b�h�r�x�(�(�)��b�h�r�z�*�*�+��b�h�r�z�*�*�+� � �  �  ����8�B�H�%�%��8�B�J�'�'��8�B�J�'�'��8�B�H�%�%��8�B�H�%�%��8�B�G�$�$� 3 �3 ����U�5��:�:�.�.��w���?�@�@�� ������� � ���*� �)� �9� �9� �� �*�� �6�=��{�;�;�A�>�?�?�� �6�=��{�;�;�A�>�?�?�� � �6�=��{�;�;�A�>�?�?�� �6�=��{�;�;�A�>�?�?��  �  �������� ! �! ��������F�M�$�0C�D�D�Q�G�H�H���� �d�$G�H�H��K���  � ���������  � ���= �= �= ����rWNr>)r?r@rArrCrWrUryry�s.������[ �[ �[ �[ �[ �[ rWryc����eZdZUeZded< dcdd�fd� Zded�Zded�Zdfd�Z dgd&�Z ded'�Z ded(�Z dhd*�Z dhd+�Zdhd,�Zdhd-�Zdhd.�Zdhd/�Zdhd0�Zdhd1�Zdid3�Zdjd6�Zded7�Zded8�Zdkd;�Zdld=�Zdld>�Zdld?�Zdld@�ZdhdA�ZdmdC�ZdmdD�Z dhdE�Z!dndG�Z"dodI�Z#dpdK�Z$dedL�Z%dedM�Z&dqdO�Z'drdsdQ�Z(e)e*�� dtdudT���Z+dvdV�Z,dwdW�Z-dxdY�Z.dyd]�Z/e0dmd^���Z1e0dmd_���Z2dzda�Z3d{db�Z4�xZ5S)|� StataReaderz IO[bytes]� _path_or_bufTNF�infer� path_or_buf�FilePath | ReadBuffer[bytes]� convert_datesr��convert_categoricals� index_col� str | None�convert_missing�preserve_dtypes�columns�Sequence[str] | None�order_categoricals� chunksize� int | None� compressionr.r7�StorageOptions | NonerGr c �*��t�����||_||_||_||_||_||_||_||_ | |_ | |_ d|_ | |_ d|_d|_|j �d|_ n*t!| t"��r| dkrt%d���d|_d|_d|_d|_d|_d|_d|_d|_t7t8j��|_dS)Nr�FrBrz.chunksize must be a positive integer when set.)�superr�_convert_dates�_convert_categoricals� _index_col�_convert_missing�_preserve_dtypes�_columns�_order_categoricals�_original_path_or_buf� _compression�_storage_optionsr� _chunksize�_using_iterator�_enteredr�rjru� _close_file�_missing_values�_can_read_value_labels�_column_selector_set�_value_labels_read� _data_read�_dtype� _lines_read�_set_endianness�sysr)�_native_byteorder) rr�r�r�r�r�r�r�r�r�r�r7� __class__s �rUrzStataReader.__init__as��� ��������,���%9��"�#��� /��� /����� �#5�� �%0��"�'��� /������#���$����� � �?� "��D�O�O��I�s�+�+� O�y�A�~�~��M�N�N� N�7;���$���&+��#�$)��!�"'������'+�� ����!0���!?�!?����rWc�R�t|d��s|���dSdS)zK Ensure the file has been opened and its header data read. r�N)�hasattr� _open_filer_s rU� _ensure_openzStataReader._ensure_open�s6���t�^�,�,� � �O�O� � � � � � � rWc�4�|js(tjdtt �����t |jd|jd|j���}t|j d��r2|j � ��r|j |_ |j |_nV|5t|j �����|_ ddd��n #1swxYwY|j j |_|���|���dS)z^ Open the file (with compression options, etc.), and read header information. zStataReader is being used without using a context manager. Using StataReader as a context manager is the only supported method.r��rbF)r7�is_textr��seekableN)r�r�r��ResourceWarningrr)r�r�r�r �handlerr��closer�r�read� _read_header� _setup_dtype)r�handless rUr zStataReader._open_file�sm���}� � �M�W��+�-�-�  � � � � � � &� � �1���)�  � � �� �7�>�:� .� .� 7�7�>�3J�3J�3L�3L� 7� '��D� �&�}�D� � �� C� C�$+�G�N�,?�,?�,A�,A�$B�$B��!� C� C� C� C� C� C� C� C� C� C� C���� C� C� C� C�#�0�6�D� � ������ �������s�,C�C�Cr1c��d|_|S)zenter context managerT)r�r_s rU� __enter__zStataReader.__enter__�s���� �� rW�exc_type�type[BaseException] | None� exc_value�BaseException | None� traceback�TracebackType | Nonec�@�|jr|���dSdSrc)r�)rrrr s rU�__exit__zStataReader.__exit__�s1�� � � � � � � � � � � � � rWc��tjdtt�����|jr|���dSdS)z�Close the handle if its open. .. deprecated: 2.0.0 The close method is not part of the public API. The only supported way to use StataReader is to use it as a context manager. z�The StataReader.close() method is not part of the public API and will be removed in a future version without notice. Using StataReader as a context manager is the only supported method.r�N)r�r�� FutureWarningrr�r_s rUrzStataReader.close�s`�� � � S� �'�)�)�  � � � � � � � � � � � � � � � � rWc�<�|jdkr d|_dSd|_dS)zC Set string encoding which depends on file version �vr rN)�_format_versionrr_s rU� _set_encodingzStataReader._set_encoding�s(�� � �#� %� %�&�D�N�N�N�$�D�N�N�NrWrjc�h�tjd|j�d����dS)NrrrBr�r�r�r�rr_s rU� _read_int8zStataReader._read_int8��*���}�S�$�"3�"8�"8��";�";�<�<�Q�?�?rWc�h�tjd|j�d����dS)N�BrBrr+r_s rU� _read_uint8zStataReader._read_uint8�r-rWc�x�tj|j�d�|j�d����dS)N�Hr�r�r�r�� _byteorderr�rr_s rU� _read_uint16zStataReader._read_uint16��5���}���2�2�2�D�4E�4J�4J�1�4M�4M�N�N�q�Q�QrWc�x�tj|j�d�|j�d����dS)N�Ir�rr3r_s rU� _read_uint32zStataReader._read_uint32�r6rWc�x�tj|j�d�|j�d����dS)Nr�rrr3r_s rU� _read_uint64zStataReader._read_uint64�r6rWc�x�tj|j�d�|j�d����dS)Nr�r�rr3r_s rU� _read_int16zStataReader._read_int16�r6rWc�x�tj|j�d�|j�d����dS)Nr-r�rr3r_s rU� _read_int32zStataReader._read_int32�r6rWc�x�tj|j�d�|j�d����dS)NrYrrr3r_s rU� _read_int64zStataReader._read_int64�r6rWr*c�h�tjd|j�d����dS)Nr1rBrr+r_s rU� _read_char8zStataReader._read_char8�r-rW�count�tuple[int, ...]c�z�tj|j�d|z��|j�d|z����S)Nr�r�r3)rrDs rU�_read_int16_countzStataReader._read_int16_count�sB���}��� -��e� � -� -� � � "� "�1�u�9� -� -� � � rWc��|���}|dkr|���dS|�|��dS)N�<)rC�_read_new_header�_read_old_header)r� first_chars rUrzStataReader._read_headersP���%�%�'�'� � �� � � � !� !� #� #� #� #� #� � !� !�*� -� -� -� -� -rWc���|j�d��t|j�d����|_|jdvr-t t �|j������|���|j�d��|j�d��dkrdnd|_|j�d ��|jd kr|� ��n|� ��|_ |j�d ��|� ��|_ |j�d ��|���|_|j�d ��|���|_|j�d��|j�d��|j�d��|���dz|_|���dz|_|���dz|_|���dz|_|���d z|_|���|_|j�d��|���dz|_|���d z|_|���dz|_|�|j��\|_|_|j� |j��|�!��|_"|j� |j��|�#|j dz��dd�|_$|j� |j��|�%��|_&|j� |j��|�'��|_(|j� |j��|�)��|_*dS)NrSr���ur'�w��version�sMSF�>�<�r'r�� ��r�� � r��rB�����)+r�rrjr(ru�_version_errorrNr)r4r5r9�_nvar� _get_nobs�_nobs�_get_data_label� _data_label�_get_time_stamp� _time_stamprA�_seek_vartypes�_seek_varnames�_seek_sortlist� _seek_formats�_seek_value_label_names�_get_seek_variable_labels�_seek_variable_labels�_data_location� _seek_strls�_seek_value_labels� _get_dtypes�_typlist� _dtyplist�seek� _get_varlist�_varlistrG�_srtlist� _get_fmtlist�_fmtlist� _get_lbllist�_lbllist�_get_variable_labels�_variable_labelsr_s rUrJzStataReader._read_new_header s��� ����r�"�"�"�"�4�#4�#9�#9�!�#<�#<�=�=��� � �� 6� 6��^�2�2�4�;O�2�P�P�Q�Q� Q� ������ ����r�"�"�"�!%�!2�!7�!7��!:�!:�f�!D�!D�#�#�#��� ����r�"�"�"�#'�#7�3�#>�#>�D� � � � � �D�DU�DU�DW�DW� � � ����q�!�!�!��^�^�%�%�� � ����r�"�"�"��/�/�1�1��� ����r�"�"�"��/�/�1�1��� ����r�"�"�"� ����q�!�!�!� ����q�!�!�!�"�.�.�0�0�2�5���"�.�.�0�0�2�5���"�.�.�0�0�2�5���!�-�-�/�/�!�3���'+�'7�'7�'9�'9�B�'>��$�&*�%C�%C�%E�%E��"� ����q�!�!�!�"�.�.�0�0�1�4����+�+�-�-��1���"&�"2�"2�"4�"4�r�"9���(,�(8�(8��9L�(M�(M�%�� �t�~� ����t�2�3�3�3��)�)�+�+�� � ����t�2�3�3�3��.�.�t�z�A�~�>�>�s��s�C�� � ����t�1�2�2�2��)�)�+�+�� � ����t�;�<�<�<��)�)�+�+�� � ����t�9�:�:�:� $� 9� 9� ;� ;����rW� seek_vartypes�,tuple[list[int | str], list[str | np.dtype]]c���|j�|��g}g}t|j��D]�}|���}|dkr8|�|��|�t |�����T |�|j|��|�|j|����#t$r}td|�d���|�d}~wwxYw||fS)N���cannot convert stata types [�]) r�rtr5r`r5r%rFr�r��KeyErrorru)rr~�typlist�dtyplist�_�typ�errs rUrqzStataReader._get_dtypesAs�� ����}�-�-�-������t�z�"�"� U� U�A��#�#�%�%�C��d�{�{����s�#�#�#�����C���)�)�)�)�U��N�N�4�#4�S�#9�:�:�:��O�O�D�$6�s�$;�<�<�<�<���U�U�U�$�%J�C�%J�%J�%J�K�K�QT�T�����U������ � s�AC � C,�C'�'C,� list[str]c�f����jdkrdnd���fd�t�j��D��S)Nr'�!�c�j��g|]/}���j��������0SrC��_decoder�r�rRr�rrrs ��rUrVz,StataReader._get_varlist.<locals>.<listcomp>X�6���S�S�S�A�� � �T�.�3�3�A�6�6�7�7�S�S�SrW�r(r5r`�rrrs`@rUruzStataReader._get_varlistUsA�����&��,�,�B�B�#��S�S�S�S�S��t�z�AR�AR�S�S�S�SrWc�����jdkrd�n�jdkrd�n�jdkrd�nd���fd�t�j��D��S) Nr'�9�q�1�hr�r�c�j��g|]/}���j��������0SrCr�r�s ��rUrVz,StataReader._get_fmtlist.<locals>.<listcomp>er�rWr�r�s`@rUrxzStataReader._get_fmtlist[so���� � �3� &� &��A�A� � !�C� '� '��A�A� � !�C� '� '��A�A��A�S�S�S�S�S��t�z�AR�AR�S�S�S�SrWc�����jdkrd�n�jdkrd�nd���fd�t�j��D��S)Nr'r�r�r�r\c�j��g|]/}���j��������0SrCr�r�s ��rUrVz,StataReader._get_lbllist.<locals>.<listcomp>or�rWr�r�s`@rUrzzStataReader._get_lbllisthsZ���� � �3� &� &��A�A� � !�C� '� '��A�A��A�S�S�S�S�S��t�z�AR�AR�S�S�S�SrWc�����jdkr!�fd�t�j��D��}nL�jdkr!�fd�t�j��D��}n �fd�t�j��D��}|S)Nr'c�j��g|]/}���j�d������0S)iAr��rRr�rs �rUrVz4StataReader._get_variable_labels.<locals>.<listcomp>ssC������>?�� � �T�.�3�3�C�8�8�9�9���rWr�c�j��g|]/}���j�d������0S)�Qr�r�s �rUrVz4StataReader._get_variable_labels.<locals>.<listcomp>w�C������=>�� � �T�.�3�3�B�7�7�8�8���rWc�j��g|]/}���j�d������0S)r.r�r�s �rUrVz4StataReader._get_variable_labels.<locals>.<listcomp>{r�rWr�)r�vlblists` rUr|z StataReader._get_variable_labelsqs���� � �3� &� &�����CH���CT�CT����G�G�� !�C� '� '�����BG�� �BS�BS����G�G�����BG�� �BS�BS����G��rWc�h�|jdkr|���S|���S)Nr')r(r;r9r_s rUrazStataReader._get_nobs�s4�� � �3� &� &��$�$�&�&� &��$�$�&�&� &rWrFc���|jdkrA|���}|�|j�|����S|jdkrA|���}|�|j�|����S|jdkr-|�|j�d����S|�|j�d����S)Nr'rOr�r�r.)r(r5r�r�rr,�r�strlens rUrczStataReader._get_data_label�s��� � �3� &� &��&�&�(�(�F��<�<�� 1� 6� 6�v� >� >�?�?� ?� � !�S� (� (��_�_�&�&�F��<�<�� 1� 6� 6�v� >� >�?�?� ?� � !�C� '� '��<�<�� 1� 6� 6�r� :� :�;�;� ;��<�<�� 1� 6� 6�r� :� :�;�;� ;rWc��|jdkrA|���}|j�|���d��S|jdkrA|���}|�|j�|����S|jdkr-|�|j�d����St ���)Nr'rrOr��)r(r,r�r�decoder�rur�s rUrezStataReader._get_time_stamp�s��� � �3� &� &��_�_�&�&�F��$�)�)�&�1�1�8�8��A�A� A� � !�S� (� (��_�_�&�&�F��<�<�� 1� 6� 6�v� >� >�?�?� ?� � !�C� '� '��<�<�� 1� 6� 6�r� :� :�;�;� ;��,�,� rWc���|jdkr2|j�d��|jd|jzzdzdzS|jdkr|���dzSt ���)NrOrr���r')r(r�rrkr`rArur_s rUrlz%StataReader._get_seek_variable_labels�sv�� � �3� &� &� � � "� "�1� %� %� %��/�2�� �?�C�b�H�2�M� M� � !�S� (� (��#�#�%�%��*� *��,�,� rWrLc����t|d���_�jdvr-tt��j��������������dkrdnd�_�����_�j � d���� ���_ �� ���_�����_�����_�jdkr*d��j � �j ��D��}n��j � �j ��}t'j|t&j� ��}g}|D]D}|�jvr!|��j|���,|�|d z ���E �fd �|D���_nE#t$r8}d �d �|D����}td|�d���|�d}~wwxYw �fd�|D���_nE#t$r8}d �d�|D����}td|�d���|�d}~wwxYw�jdkr&�fd�t7�j ��D���_n%�fd�t7�j ��D���_���j dz��dd��_�����_ ��!���_"��#���_$�jdkrk ����} �jdkr��%��} n��&��} | dkrn�j � | ���j�j �'���_(dS)Nr)r�r�r��or��r�srQrBrTrUr�c�,�g|]}t|����SrC)rj�rRr1s rUrVz0StataReader._read_old_header.<locals>.<listcomp>�s��J�J�J�!�s�1�v�v�J�J�JrWr��c�*��g|]}�j|��SrC)r��rRr�rs �rUrVz0StataReader._read_old_header.<locals>.<listcomp>�s ���C�C�C�C�T�]�3�/�C�C�CrW�,c�,�g|]}t|����SrC�rF�rRr�s rUrVz0StataReader._read_old_header.<locals>.<listcomp>�s��%>�%>�%>��c�!�f�f�%>�%>�%>rWr�r�c�*��g|]}�j|��SrC)r�r�s �rUrVz0StataReader._read_old_header.<locals>.<listcomp>�s ���E�E�E�c�d�n�S�1�E�E�ErWc�,�g|]}t|����SrCr�r�s rUrVz0StataReader._read_old_header.<locals>.<listcomp>�s��&?�&?�&?�!�s�1�v�v�&?�&?�&?rWzcannot convert stata dtypes [c�j��g|]/}���j�d������0S)r�r�r�s �rUrVz0StataReader._read_old_header.<locals>.<listcomp>�sC������=>�� � �T�.�3�3�B�7�7�8�8���rWc�j��g|]/}���j�d������0S)r\r�r�s �rUrVz0StataReader._read_old_header.<locals>.<listcomp>�sC������<=�� � �T�.�3�3�A�6�6�7�7���rWr^r�))rjr(rur_rNr)r,r4� _filetyper�rr5r`rarbrcrdrerfr�� frombufferr�r�r%rr�joinrsr5rvrGrwrxryrzr{r|r}r?r=�tellrn) rrLr��buf�typlistb�tpr�� invalid_types�invalid_dtypes� data_type�data_lens ` rUrKzStataReader._read_old_header�s(���"�:�a�=�1�1��� � �'J� J� J��^�2�2�4�;O�2�P�P�Q�Q� Q� ������!%���!2�!2�c�!9�!9�#�#�s������*�*��� ����q�!�!�!��&�&�(�(�� ��^�^�%�%�� ��/�/�1�1����/�/�1�1��� � �#� %� %�J�J�t�'8�'=�'=�d�j�'I�'I�J�J�J�G�G��#�(�(���4�4�C��}�S���9�9�9�H��G�� -� -����.�.�.��N�N�4�#8��#<�=�=�=�=��N�N�2��8�,�,�,�,� W�C�C�C�C�7�C�C�C�D�M�M��� W� W� W��H�H�%>�%>�g�%>�%>�%>�?�?�M��L�M�L�L�L�M�M�SV� V����� W���� Y�E�E�E�E�W�E�E�E�D�N�N��� Y� Y� Y� �X�X�&?�&?�w�&?�&?�&?�@�@�N��N�^�N�N�N�O�O�UX� X����� Y���� � �#� %� %�����BG�� �BS�BS����D�M�M�����AF�t�z�AR�AR����D�M��.�.�t�z�A�~�>�>�s��s�C�� ��)�)�+�+�� ��)�)�+�+�� � $� 9� 9� ;� ;��� � �#� %� %� 1� �O�O�-�-� ��'�#�-�-�#�/�/�1�1�H�H�#�/�/�1�1�H���>�>���!�&�&�x�0�0�0� 1�#�/�4�4�6�6����s0�G0�0 H2�:3H-�-H2�6I � J �3J�J rlc�j�|j�|jSg}t|j��D]o\}}||jvrDt t |��}|�d|��|j�|j|��f���R|�d|��d|��f���ptj |��|_|jS)z"Map between numpy and state dtypesN�sr|) rrrrr�r rFr%r4r�r�)r�dtypesr-r�s rUrzStataReader._setup_dtype�s��� �;� "��;� ����� �.�.� 4� 4�F�A�s��d�)�)�)��3��n�n��� � �w�1�w�w�4�?�(V�D�<O�PS�<T�(V�(V�W�X�X�X�X�� � �w�1�w�w� �C� � �2�3�3�3�3��h�v�&�&�� ��{�rWr�c�"�|�d��d} |�|j��S#t$rM|j}d|�d�}t j|t t�����|�d��cYSwxYw)Nr,rz@ One or more strings in the dta file could not be decoded using z�, and so the fallback encoding of latin-1 is being used. This can happen when a file has been incorrectly encoded by Stata or some other software. You should verify the string values returned are correct.r�r )� partitionr�r�UnicodeDecodeErrorr�r��UnicodeWarningr)rr�r �msgs rUr�zStataReader._decodes��� �K�K�� � �q� !�� '��8�8�D�N�+�+� +��!� '� '� '��~�H�+�@H�+�+�+�C� �M���+�-�-� � � � � �8�8�I�&�&� &� &� &� '���s�7�AB� Bc���|���|jrdS|jdkrd|_i|_dS|jdkr |j�|j��n?|j�J�|j|jj z}|j�|j |z��d|_i|_ |jdkr |j� d��dkr�n|j� d��}|s�n�|jdkr.|� |j� d����}n-|� |j� d����}|j� d ��|� ��}|� ��}tj|j� d|z��|j�d �|� ��}tj|j� d|z��|j�d �|� ��}tj|��}||}||}|j� |��} i|j|<t%|��D]O} | |d z kr || d zn|} |� | || | ���|j||| <�P|jdkr|j� d ����3d|_dS)Nr�TrO�s</valr�r�r�r�r��r�rDrBr�)rrr(�_value_label_dictr�rtrprrb�itemsizernrr�r9r�r�r4�argsortr5) rr:�slengthrr�txtlenr!r"�iirr-r�s rU�_read_value_labelszStataReader._read_value_labelss �� ������ � "� � �F� � �3� &� &�&*�D� #�BD�D� "� �F� � �3� &� &� � � "� "�4�#:� ;� ;� ;� ;��;�*�*�*��Z�$�+�"6�6�F� � � "� "�4�#6��#?� @� @� @�"&���!#���! *��#�s�*�*��$�)�)�!�,�,��8�8���'�,�,�Q�/�/�G�� ���#�s�*�*��,�,�t�'8�'=�'=�b�'A�'A�B�B����,�,�t�'8�'=�'=�c�'B�'B�C�C�� � � "� "�1� %� %� %��!�!�#�#�A��&�&�(�(�F��-��!�&�&�q�1�u�-�-���5K�5K�5K�ST����C��-��!�&�&�q�1�u�-�-���5K�5K�5K�ST����C���C���B��b�'�C��b�'�C��#�(�(��0�0�C�.0�D� "�7� +��1�X�X� � ��$%��A��I�I�c�!�a�%�j�j�6��:>�,�,���A��� �%�;�;��&�w�/��A��7�7��#�s�*�*��!�&�&�q�)�)�)�C! *�D#'����rWc���|j�|j��ddi|_ |j�d��dkrdS|jdkr|���}n�|j�d��}|jdkrd nd}|jd kr|d |�|d d|z �z}n|d |�|d |zd�z}tj d |��d }|� ��}|� ��}|j�|��}|dkr#|d d�� |j ��}nt|��}||jt|��<��S)N�0r�Tr�sGSOrOr�r'r�rUrr�r��r^)r�rtro�GSOrr(r;r4r�r�r0r9r�rrF)r�v_or��v_sizer��length�va� decoded_vas rU� _read_strlszStataReader._read_strlsSs��� ����t�/�0�0�0���9��� ,�� �%�%�a�(�(�F�2�2����#�s�*�*��'�'�)�)����'�,�,�R�0�0��"�2�c�9�9���q���?�c�)�)��a��h�-�#�a�2��;�.?�*@�@�C�C��a��h�-�#�q�6�z�n�n�*=�=�C��m�C��-�-�a�0���"�"�$�$�C��&�&�(�(�F��"�'�'��/�/�B��c�z�z���"��X�_�_�T�^�<�<� � �!��W�W� �!+�D�H�S��X�X� �3 ,rWr$c�F�d|_|�|j���S)NT��nrows)r�rr�r_s rU�__next__zStataReader.__next__rs ��#����y�y�t��y�/�/�/rW�sizec�@�|�|j}|�|���S)a Reads lines from Stata file and returns as dataframe Parameters ---------- size : int, defaults to None Number of lines to read. If None, reads whole file. Returns ------- DataFrame Nr�)r�r)rr�s rU� get_chunkzStataReader.get_chunkvs$�� �<��?�D��y�y�t�y�$�$�$rWr�� bool | Nonec �� ��|���|�|j}|�|j}|�|j}|�|j}|�|j}|�|j}|�|j}|�|j}|jdkr�|dkr�d|_ d|_ t|j ���} t| j��D]U\} } |j| } t!| t"j��r)| jdkr| | �| ��| | <�V|�|�| |��} | S|jdkr"|jsd|_ |���|j�J�|j} |j|jz | jz}|| jz}t9||��}|dkr|r|���t<�|j| jz}|j� |j!|z��t9||j|jz ��}t#j"|j�#|��| |���}|xj|z c_|j|jkrd|_ d|_ |j$|j%kr>|�&���'|j�(����}|r|���tS|��dkrt|j ���} n-tj*|��} tW|j ��| _|�"tY|j|z |j��| _-|�|�| |��} t]| |j/��D]=\} }t!|t`��r#| | �1|j2��| | <�>|�3| ��} d�t|j��D��}t#jth��}|D]a}| j5dd�|fj} | ||j|fvr8| �6|| j5dd�|f�| �����b|�7| |��} |rmt|j8��D]X\} �ts�fd�ttD����r3| �6| tw| j5dd�| f������Y|r-|jd kr"|�<| |j=|j>|��} |�sEg}d }| D�]} | | j} | t#jt"j?��t#jt"j@��fvr!t#jt"jA��} d}n{| t#jt"jB��t#jt"jC��t#jt"jD��fvr t#jt"jE��} d}|�F| | | �| ��f����|r!tjGt�|����} |�(| �I| �J|����} | S) NrT)r�r|rOr�c��g|] \}}|�|�� SrcrC)rRr-�dtyps rUrVz$StataReader.read.<locals>.<listcomp>�s!��W�W�W�g�a��d�FV��FV�FV�FVrWc3�B�K�|]}��|��V��dSrc)r�)rR�date_fmtrEs �rU� <genexpr>z#StataReader.read.<locals>.<genexpr>�s/�����N�N�H�s�~�~�h�/�/�N�N�N�N�N�NrWr�F)Krr�r�r�r�r�r�r�rbrrr$rvrr�rsr�r�r��charr��_do_select_columnsr(rr�rrr�rZr�� StopIterationr�rtrnr�rr4r �byteswapr�� newbyteorderr#� from_recordsr%r&rOr\rrrjr�r�� _insert_strlsr��iloc�isetitem�_do_convert_missingryr�� _date_formatsr��_do_convert_categoricalsr�r{�float16r�r�r�r�r�r�r%� from_dictr�� set_index�pop)rr�r�r�r�r�r�r�r�r�r-r��dtr�� max_read_len�read_lenr:� read_lines�raw_datar�� valid_dtypes� object_type�idx� retyped_data�convertrEs @rUrzStataReader.read�s��� ������ � � �/�M� � '�#'�#=� � � "�"�3�O� � "�"�3�O� �?��m�G� � %�!%�!9� � � ���I� �=��J�E� �J�!�O�O��!���*.�D� '�"�D�O��T�]�3�3�3�D�#�D�L�1�1� 9� 9���3��^�A�&���b�"�(�+�+�9��w�#�~�~�$(��I�$4�$4�R�$8�$8��S� ���"��.�.�t�W�=�=���K� � �C� '� '�$�2I� '�*.�D� '� � � � � � ��{�&�&�&�� ��� �T�%5�5���G� ��5�>�)���x��.�.�� �q�=�=�$� *��'�'�)�)�)�� ��!�E�N�2�� ����t�2�V�;�<�<�<���� �T�-=� =�>�>� ��=� � � "� "�8� ,� ,�E�� � � �� ���J�&��� � �t�z� )� )�*.�D� '�"�D�O� �?�d�4� 4� 4��(�(�*�*�/�/���0K�0K�0M�0M�N�N�H� � &� � #� #� %� %� %� �x�=�=�A� � ��T�]�3�3�3�D�D��)�(�3�3�D� ���/�/�D�L� � �#�� �:�-�t�/?���D�J� � ��*�*�4��9�9�D��D�$�-�0�0� :� :�H�C���#�s�#�#� :� ��I�O�O�D�L�9�9��S� ���!�!�$�'�'��X�W��4�>�)B�)B�W�W�W� ��h�v�&�&� �� D� D�C��I�a�a�a��f�%�+�E��[�$�.��*=�>�>�>�� � �c�4�9�Q�Q�Q��V�#4�#;�#;�E�#B�#B�C�C�C���'�'��o�>�>�� � �#�D�M�2�2� � ���3��N�N�N�N� �N�N�N�N�N���M�M��>�t�y����A���PS�T�T����� � �D�$8�3�$>�$>��0�0��d�,�d�m�=O���D�� ?��L��G�� D� D���S� ����R�X�b�j�1�1�2�8�B�J�3G�3G�H�H�H��H�R�Z�0�0�E�"�G�G���H�R�W�%�%��H�R�X�&�&��H�R�X�&�&���� �H�R�X�.�.�E�"�G��#�#�S�$�s�)�*:�*:�5�*A�*A�$B�C�C�C�C�� ?� �*�4� �+=�+=�>�>�� � ��>�>�$�(�(�9�"5�"5�6�6�D�� rWr�c���i}tt|j����D�]�}|j|}||jvr�t t |��}|j|\}}|jdd�|f}|j} | |k| |kz} | � ��s�y|r�tj tj | ����d} tj || d���\} } t|t���}t!| ��D]*\}}t#|��}| | |k}||j|<�+nx|j}|tjtjfvr tj}t||���}|jjds|���}tj|j| <|||<���|r0|���D]\}}|�||���|S)NrT)�return_inverser�� WRITEABLE)r5r#r�rrr�r rFr�r�r�r��nonzero�asarray�uniquer'r�rr�r�r�r��flagsr��nanrKr�)rr�r�� replacementsr-rE�nmin�nmax�series�svals�missing� missing_loc�umissing� umissing_loc� replacement�j�umr�r�r�r rks rUr�zStataReader._do_convert_missings��� ��s�4�<�(�(�)�)�% *�% *�A��-��"�C��$�*�*�*���s�C�.�.�C��)�#�.�J�D�$��Y�q�q�q�!�t�_�F��N�E��t�|��� �5�G��;�;�=�=� ��� 6� �j���G�)<�)<�=�=�a�@� �)+��6�'�?�SW�)X�)X�)X�&��,�$�V�6�:�:�:� �&�x�0�0�:�:�E�A�r�$5�b�$9�$9�M�%�l�a�&7�8�C�,9�K�$�S�)�)� :� � �����R�Z� 8�8�8��J�E�$�V�5�9�9�9� �"�*�0��=�5�#.�"2�"2�"4�"4�K�02�v� �#�G�,�)�L��O�O� � *�*�0�0�2�2� *� *� ��U�� � �c�5�)�)�)�)�� rWc���t�d��rt�j��dkr|St�j��D]=\}}|dkr� |�|�fd�|jdd�|fD�����>|S)Nr�rr�c�D��g|]}�jt|����SrC)r�rF)rR�krs �rUrVz-StataReader._insert_strls.<locals>.<listcomp>Ts&���H�H�H�1�d�h�s�1�v�v�.�H�H�HrW)r r#r�rrrr�r�)rr�r-r�s` rUr�zStataReader._insert_strlsMs�����t�U�#�#� �s�4�8�}�}��'9�'9��K��� �.�.� J� J�F�A�s��c�z�z�� �M�M�!�H�H�H�H�� �!�!�!�Q�$��H�H�H� I� I� I� I�� rW� Sequence[str]c���|j�sXt|��}t|��t|��krtd���|�|j��}|r4d�t|����}td|�����g}g}g}g} |D]�} |j�| ��} |� |j | ��|� |j | ��|� |j | ��| � |j | ����||_ ||_ ||_ | |_ d|_||S)Nz"columns contains duplicate entriesz, z<The following columns were not found in the Stata data set: T)r�setr#ru� differencer�r�r��get_locr%rsrrryr{) rr�r�� column_set� unmatched�joinedr�r��fmtlist�lbllistr�r-s rUr�zStataReader._do_select_columnsWse���(� -��W���J��:���#�g�,�,�.�.� �!E�F�F�F�"�-�-�d�l�;�;�I�� ����4� �?�?�3�3�� �=�4:�=�=���� �H��G��G��G�� 1� 1���L�(�(��-�-�������q� 1�2�2�2����t�}�Q�/�0�0�0����t�}�Q�/�0�0�0����t�}�Q�/�0�0�0�0�%�D�N�#�D�M�#�D�M�#�D�M�(,�D� %��G�}�rW�value_label_dict�dict[str, dict[float, str]]r/c��|s|Sg}t||��D�]\}}||v�r�||}tjt|�������} ||} | �| ��} |jr| ���r| } n6|jr-tj ttt�����d} t| | |���} | �Bg}| jD]7}||vr|�||���"|�|���8n!t|�����} | �|��} n�#t$$rw}t'|d������}t|j|dk��}dd�|��z}d |�d |�d�}t%|��|�d}~wwxYwt'| |jd� ��}|�||f�����|�|||f����t/t1|��d���}|S) zC Converts categorical columns to Categorical type. r�N)r�orderedF)r�rBzQ-------------------------------------------------------------------------------- r4z Value labels for column a are not unique. These cannot be converted to pandas categoricals. Either read the file with `convert_categoricals` set to False or use the low level interface in `StataReader` to separately read the values and the value_labels. The repeated labels are: r�)r\r�r r��keys�isinr��allr�r�r�rrrrr%rw�rename_categoriesrur'� value_countsrOr�r$r�)rr�r0r/r��cat_converted_datar��labelr'r4�column� key_matches�initial_categories�cat_datarr(r��vc� repeated_cats�repeatsr�� cat_seriess rUrz$StataReader._do_convert_categoricalsws��� � ��K����d�G�,�,�= <�= <�J�C���(�(�(�%�e�,���x��R�W�W�Y�Y���0�0���c���$�k�k�$�/�/� ��'�.�K�O�O�,=�,=�.�<@�&�&� �+�� � �:�8�'7�'9�'9����� *.�&�&��'9�CU�����&�-�!#�J�$,�$7�8�8��#�r�>�>�&�-�-�b��l�;�;�;�;�&�-�-�h�7�7�7�7� 8�"&�b�i�i�k�k�!2�!2�J�3� (�9�9�*�E�E�H�H��!�3�3�3�� ��7�7�7�D�D�F�F�B�$(���"�q�&�)9�$:�$:�M�-�� � �-�0H�0H�H�G� �� � � � � � �C�%�S�/�/�s�2�����!3����$$�H�D�J�U�K�K�K� �"�)�)�3� �*;�<�<�<�<�"�)�)�3��S� �*:�;�;�;�;���0�1�1��>�>�>��� s�E� G�&A2G�Gc�8�|���|jS)a� Return data label of Stata file. Examples -------- >>> df = pd.DataFrame([(1,)], columns=["variable"]) >>> time_stamp = pd.Timestamp(2000, 2, 29, 14, 21) >>> data_label = "This is a data file." >>> path = "/My_path/filename.dta" >>> df.to_stata(path, time_stamp=time_stamp, # doctest: +SKIP ... data_label=data_label, # doctest: +SKIP ... version=None) # doctest: +SKIP >>> with pd.io.stata.StataReader(path) as reader: # doctest: +SKIP ... print(reader.data_label) # doctest: +SKIP This is a data file. )rrdr_s rU� data_labelzStataReader.data_label�s��$ ��������rWc�8�|���|jS)z2 Return time stamp of Stata file. )rrfr_s rU� time_stampzStataReader.time_stamp�s�� ��������rW�dict[str, str]c�x�|���tt|j|j����S)a� Return a dict associating each variable name with corresponding label. Returns ------- dict Examples -------- >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=["col_1", "col_2"]) >>> time_stamp = pd.Timestamp(2000, 2, 29, 14, 21) >>> path = "/My_path/filename.dta" >>> variable_labels = {"col_1": "This is an example"} >>> df.to_stata(path, time_stamp=time_stamp, # doctest: +SKIP ... variable_labels=variable_labels, version=None) # doctest: +SKIP >>> with pd.io.stata.StataReader(path) as reader: # doctest: +SKIP ... print(reader.variable_labels()) # doctest: +SKIP {'index': '', 'col_1': 'This is an example', 'col_2': ''} >>> pd.read_stata(path) # doctest: +SKIP index col_1 col_2 0 0 1 2 1 1 3 4 )rr�r\rvr}r_s rU�variable_labelszStataReader.variable_labels�s3��0 �������C�� �t�'<�=�=�>�>�>rWc�F�|js|���|jS)aX Return a nested dict associating each variable name to its value and label. Returns ------- dict Examples -------- >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=["col_1", "col_2"]) >>> time_stamp = pd.Timestamp(2000, 2, 29, 14, 21) >>> path = "/My_path/filename.dta" >>> value_labels = {"col_1": {3: "x"}} >>> df.to_stata(path, time_stamp=time_stamp, # doctest: +SKIP ... value_labels=value_labels, version=None) # doctest: +SKIP >>> with pd.io.stata.StataReader(path) as reader: # doctest: +SKIP ... print(reader.value_labels()) # doctest: +SKIP {'col_1': {3: 'x'}} >>> pd.read_stata(path) # doctest: +SKIP index col_1 col_2 0 0 1 2 1 1 x 4 )rr�r�r_s rUrzStataReader.value_labels�s*��0�&� &� � #� #� %� %� %��%�%rW) TTNFTNTNr�N)r�r�r�r�r�r�r�r�r�r�r�r�r�r�r�r�r�r�r�r.r7r�rGr r>)rGr1)rrrrr r!rGr )rGrj)rGr*)rDrjrGrE)r~rjrGr)rGr�rp)rLr*rGr )rGrl)r�r*rGrF)rGr$rc)r�r�rGr$)NNNNNNNN)r�r�r�r�r�r�r�r�r�r�r�r�r�r�r�r�rGr$)r�r$r�r�rGr$�r�r$rGr$)r�r$r�r&rGr$) r�r$r0r1r/r&r�r�rGr$)rGrG)rGr1)6r?r@rA�_stata_reader_docrBrqrrr rr#rr)r,r0r5r9r;r=r?rArCrGrrJrqrurxrzr|rarcrerlrKrr�r�r�r�r�r�_read_method_docrr�r�r�rrvrDrFrIr� __classcell__�r s@rUr�r�\sq���������G����� #�%)� $� %� $�(,�#'� $�*1�15�/@�/@�/@�/@�/@�/@�/@�b��������>���� ��������$%�%�%�%�@�@�@�@�@�@�@�@�R�R�R�R�R�R�R�R�R�R�R�R�R�R�R�R�R�R�R�R�R�R�R�R�@�@�@�@� � � � � .�.�.�.�5<�5<�5<�5<�p!�!�!�!�(T�T�T�T� T� T� T� T�T�T�T�T� � � � �'�'�'�'� <� <� <� <� � � � � � � � �I7�I7�I7�I7�V���� '�'�'�'�*7'�7'�7'�7'�r,�,�,�,�>0�0�0�0�%�%�%�%�%�"�X����!�%)�,0� $�'+�'+�(,�*.�U�U�U�U� ��U�n,�,�,�,�\��������@L�L�L�L�\� � � ��X� �(� � � ��X� �?�?�?�?�6&�&�&�&�&�&�&�&rWr�TFr�) r�r�r�r�r�r�r�r��iteratorr�r7r�r�r�r�r�r�r�r�r�r�r�r�r�rPr�r.r��DataFrame | StataReaderc ��t|||||||||| | �� � } | s|r| S| 5| ���cddd��S#1swxYwYdS)N) r�r�r�r�r�r�r�r�r7r�)r�r) r6r�r�r�r�r�r�r�r�rPr�r7�readers rU� read_statarTs��� ��#�1��'�'��-��'�� � � �F���9��� � ����{�{�}�}���������������������s�A�A� A� endiannessc��|���dvrdS|���dvrdStd|�d����)N)rU�littlerU)rT�bigrTz Endianness r�)�lowerru)rUs rUrr@sT�������_�,�,��s� � � � � �|� +� +��s��B�z�B�B�B�C�C�CrWr�r r�rjc��t|t��r|d|t|��z zzS|d|t|��z zzS)zQ Take a char string and pads it with null bytes until it's length chars. r,�)r�r*r#�r�r�s rUr4r4IsL���$����5��g��#�d�)�)�!3�4�4�4� �&�F�S��Y�Y�.�/� /�/rWrlc�l�|dvrtjtj��Std|�d����)zK Convert from one of the stata date formats to a type in TYPE_MAP. )r~r8r�r:r�r<r�r=r�r>r�r?r�r@r�z not implemented)r�r�r��NotImplementedError)rEs rU�_convert_datetime_to_stata_typer_RsD�� ���� �x�� �#�#�#�!�"A�C�"A�"A�"A�B�B�BrWr��varlist�list[Hashable]c�R�i}|D]�}||�d��sd||z||<||vr1|�|�|��||i���`t|t��st d���|�|||i����|S)N�%z0convert_dates key must be a column or an integer)r��updaterOr�rjru)r�r`�new_dictrIs rU�_maybe_convert_to_int_keysrfks����H��7�7���S�!�,�,�S�1�1� :�!$�}�S�'9�!9�M�#� � �'�>�>� �O�O�W�]�]�3�/�/��s�1C�D� E� E� E� E��c�3�'�'� U� �!S�T�T�T� �O�O�S�-��"4�5� 6� 6� 6� 6� �OrWr�r;c��|jtjur1tt |j����}t |d��S|jtjurdS|jtjurdS|jtj urdS|jtj urdS|jtj urdStd|�d����) a� Convert dtype types to stata types. Returns the byte of the given ordinal. See TYPE_MAP and comments for an explanation. This is also explained in the dta spec. 1 - 244 are strings of this length Pandas Stata 251 - for int8 byte 252 - for int16 int 253 - for int32 long 254 - for float32 float 255 - for double double If there are dates to convert, then dtype will already have the correct type inserted. rBr�r�r�rr~� Data type � not supported.� rgr��object_rrr�rYr�r�r�r�r�r^)r�r;r�s rU�_dtype_to_stata_typerlys���" �z�R�Z���(� �f�n�(E�(E�F�F���8�Q���� ��r�z� !� !��s� ��r�z� !� !��s� ��r�x� � ��s� ��r�x� � ��s� ��r�w� � ��s�!�"E�u�"E�"E�"E�F�F�FrWr�� dta_version� force_strlc� �|dkrd}nd}|rdS|jtjur~tt |j����}||kr4|dkrdSt t�|j �����dtt|d����zdzS|tj krdS|tj krd S|tjkrd S|tjtjfvrd St#d |�d ����)a� Map numpy dtype to stata's default format for this type. Not terribly important since users can change this in Stata. Semantics are object -> "%DDs" where DD is the length of the string. If not a string, raise ValueError float64 -> "%10.0g" float32 -> "%9.0g" int64 -> "%9.0g" int32 -> "%12.0g" int16 -> "%8.0g" int8 -> "%8.0g" strl -> "%9s" rO��r�z%9srcrBr�z%10.0gz%9.0gz%12.0gz%8.0grhri)rgr�rkrrr�rur�rNr�rFrYr�r�r�r�r�r^)r�r;rmrn� max_str_lenr�s rU�_dtype_to_default_stata_fmtrr�s��&�S���� � �� � � ��5� �z�R�Z���'� �f�n�(E�(E�F�F�� �k� !� !��c�!�!��u� �!>�!E�!E�f�k�!R�!R�S�S�S��S��X�q�)�)�*�*�*�S�0�0� �"�*� � ��x� �"�*� � ��w� �"�(� � ��x� �2�7�B�H�%� %� %��w�!�"E�u�"E�"E�"E�F�F�FrW�compression_options�fname)r7rsc�^��eZdZUdZdZdZded< dKdd �dL�fd �ZdMd#�ZdNd&�Z dOd(�Z dPd)�Z dPd*�Z dQd+�Z dRd-�ZdPd.�ZdSd1�ZdTd2�ZdQd3�ZdQd4�ZdQd5�ZdQd6�ZdQd7�ZdQd8�ZdQd9�ZdQd:�ZdQd;�Z dUdVd<�ZdQd=�ZdQd>�ZdQd?�ZdQd@�ZdQdA�Z dQdB�Z!dPdC�Z"dWdE�Z#dXdG�Z$e%dYdI���Z&dZdJ�Z'�xZ(S)[� StataWriterar A class for writing Stata binary dta files Parameters ---------- fname : path (string), buffer or path object string, path object (pathlib.Path or py._path.local.LocalPath) or object implementing a binary write() functions. If using a buffer then the buffer will not be automatically closed after the file is written. data : DataFrame Input to save convert_dates : dict Dictionary mapping columns containing datetime types to stata internal format to use when writing the dates. Options are 'tc', 'td', 'tm', 'tw', 'th', 'tq', 'ty'. Column can be either an integer or a name. Datetime columns that do not have a conversion type specified will be converted to 'tc'. Raises NotImplementedError if a datetime column has timezone information write_index : bool Write the index to Stata dataset. byteorder : str Can be ">", "<", "little", or "big". default is `sys.byteorder` time_stamp : datetime A datetime to use as file creation date. Default is the current time data_label : str A label for the data set. Must be 80 characters or smaller. variable_labels : dict Dictionary containing columns as keys and variable labels as values. Each label must be 80 characters or smaller. {compression_options} .. versionchanged:: 1.4.0 Zstandard support. {storage_options} value_labels : dict of dicts Dictionary containing columns as keys and dictionaries of column value to labels as values. The combined length of all labels for a single variable must be 32,000 characters or smaller. .. versionadded:: 1.4.0 Returns ------- writer : StataWriter instance The StataWriter instance has a write_file method, which will write the file to the given `fname`. Raises ------ NotImplementedError * If datetimes contain timezone information ValueError * Columns listed in convert_dates are neither datetime64[ns] or datetime * Column dtype is not representable in Stata * Column listed in convert_dates is not in DataFrame * Categorical label contains more than 32,000 characters Examples -------- >>> data = pd.DataFrame([[1.0, 1]], columns=['a', 'b']) >>> writer = StataWriter('./data_file.dta', data) >>> writer.write_file() Directly write a zip file >>> compression = {{"method": "zip", "archive_name": "data_file.dta"}} >>> writer = StataWriter('./data_file.zip', data, compression=compression) >>> writer.write_file() Save a DataFrame with dates >>> from datetime import datetime >>> data = pd.DataFrame([[datetime(2000,1,1)]], columns=['date']) >>> writer = StataWriter('./date_data_file.dta', data, {{'date' : 'tw'}}) >>> writer.write_file() rpr r rNTr��rrt�FilePath | WriteBuffer[bytes]r�r$r��dict[Hashable, str] | None� write_indexr�r)r�rF�datetime | NonerDrIr�r.r7r�r�'dict[Hashable, dict[float, str]] | NonerGr c ���t�����||_|�in||_||_||_||_||_| |_g|_ tj gt���|_ | |_d|_i|_|�|��| |_|� t&j}t+|��|_||_tjtjtjd�|_dS)Nr�)r�rr~)r�rr�r�� _write_indexrfrdr}�_non_cat_value_labels� _value_labelsr�r r��_has_value_labelsr�� _output_file�_converted_names�_prepare_pandasr7rr)rr4�_fnamer�r�r��type_converters) rrtr�r�rzr)rFrDrIr�r7rr s �rUrzStataWriter.__init__ s���� ���������� �$1�$9�b�b�}���'���%���%��� /���%1��"�46���!#��"�D�!9�!9�!9���'���.2���57��� ���T�"�"�"�.��� � �� �I�)�)�4�4����� �%'�X�B�H�2�7�K�K����rW�to_writerFc�t�|jj�|�|j����dS)zS Helper to call encode before writing to file for Python 3 compat. N)rrr2r$r)rr�s rU�_writezStataWriter._writeF s1�� � ��!�!�(�/�/�$�.�"A�"A�B�B�B�B�BrWrkr*c�D�|jj�|��dS)z? Helper to assert file is open before writing. N)rrr2r]s rU� _write_byteszStataWriter._write_bytesL s#�� � ��!�!�%�(�(�(�(�(rW�list[StataNonCatValueLabel]c��g}|j�|S|j���D]�\}}||jvr|j|}n,||jvrt |��}nt d|�d����t ||j��std|�d����t|||j ��}|� |����|S)zc Check for value labels provided for non-categorical columns. Value labels NzCan't create value labels for z!, it wasn't found in the dataset.z6, value labels can only be applied to numeric columns.) rrKr�r�rFr�rr�rurErr%)rr��non_cat_value_labelsr�labels�colname�svls rU�_prepare_non_cat_value_labelsz)StataWriter._prepare_non_cat_value_labelsR s��=?�� � %� -�'� '�#�9�?�?�A�A� -� -�O�G�V��$�/�/�/��/��8����D�L�(�(��g�,�,����,�W�,�,�,���� $�D��M�$7�8�8� �!�>�W�>�>�>����(�����H�H�C� � '� '�� ,� ,� ,� ,�#�#rWc�>�d�|jD��}t|��s|S|xjtj|��zc_t j}g}t||��D�]�\}}|�rst|||j ���}|j � |��||j j j}|tjkrt!d���||j j j���} | ���||��kr�|tjkrtjtj��}nM|tjkrtjtj��}ntjtj��}tj| |���} ||��| | dk<|� || f����||� |||f�����t1jt5|����S)z� Check for categorical columns, retain categorical information for Stata file and convert categorical data to int c�8�g|]}t|t����SrC)r�r)rRr�s rUrVz5StataWriter._prepare_categoricals.<locals>.<listcomp>x s#��O�O�O�%�*�U�$4�5�5�O�O�OrW)r zCIt is not possible to export int64-based categorical data to Stata.r�r^)r�r�r�r�r r�ror\rrr�r%r�codesr�r�rur�r�rYr�r�r�r�r$rr�) rr��is_catro�data_formattedr�� col_is_catr�r�rws rU�_prepare_categoricalsz!StataWriter._prepare_categoricalss s��� P�O�4�;�O�O�O���6�{�{� ��K� ���"�(�6�"2�"2�2���!2�!I����"�4��0�0� 8� 8�O�C��� 8�%�d�3�i�$�.�I�I�I���"�)�)�#�.�.�.��S� � �+�1���B�H�$�$�$�A�����c���,�4�9�9�;�;���:�:�<�<�#9�#9�%�#@�#@�@�@����'�'� "���� 2� 2����"�(�*�*� "���� 2� 2��� "���� 4� 4���X�f�E�:�:�:�F�(>�'=�e�'D�'D��v��|�$��%�%�s�F�m�4�4�4�4��%�%�s�D��I�&6�7�7�7�7��"�4��#7�#7�8�8�8rWc���|D]r}||j}|tjtjfvrI|tjkr|jd}n |jd}||�|��||<�s|S)z� Checks floating point data columns for nans, and replaces these with the generic Stata for missing value (.) r�rd)r�r�r�r�rMr�)rr�r1r�r s rU� _replace_nanszStataWriter._replace_nans� sz�� � 6� 6�A���G�M�E����R�Z�0�0�0��B�J�&�&�"&�"5�c�":�K�K�"&�"5�c�":�K��q�'�.�.��5�5��Q���� rWc��dS)zNo-op, forward compatibilityNrCr_s rU�_update_strl_nameszStataWriter._update_strl_names� ����rWr�c��|D]B}|dks|dkr4|dks|dkr(|dks|dkr|dkr|�|d��}�C|S)a� Validate variable names for Stata export. Parameters ---------- name : str Variable name Returns ------- str The validated name with invalid characters replaced with underscores. Notes ----- Stata 114 and 117 support ascii characters in a-z, A-Z, 0-9 and _. �A�Z�a�zr��9r�)�replace�rr�r1s rU�_validate_variable_namez#StataWriter._validate_variable_name� sh��(� ,� ,�A��S���A��G�G���W�W��C�����W�W��C�����H�H��|�|�A�s�+�+���� rWc��i}t|j��}|dd�}d}t|��D�]\}}|}t|t��st |��}|�|��}||jvrd|z}d|dcxkrdkrnnd|z}|dtt|��d���}||ksv|� |��dkrXdt |��z|z}|dtt|��d���}|dz }|� |��dk�X|||<|||<��t|��|_|j r9t||��D](\} } | | kr|j | |j | <|j | =�)|r�g} |� ��D]!\}}|�d|��} | �| ���"t�d �| ����} t%j| t(t+��� ��||_|���|S) a� Checks column names to ensure that they are valid Stata column names. This includes checks for: * Non-string names * Stata keywords * Variables that start with numbers * Variables with names that are too long When an illegal variable name is detected, it is converted, and if dates are exported, the variable name is propagated to the date conversion dictionary Nrr�r�r�r.rBz -> z r�)r�r�rr�rFr�r�rZr#rDr%r�r\rKr%r�rNr�r�r�rrr�r�)rr��converted_namesr��original_columns�duplicate_var_idr!r�� orig_namer1�o�conversion_warningr�r�s rU�_check_column_nameszStataWriter._check_column_names� s���02���t�|�$�$��"�1�1�1�:���� ��)�)� � �G�A�t��I��d�C�(�(� !��4�y�y���/�/��5�5�D��t�*�*�*��T�z���d�1�g�$�$�$�$��$�$�$�$�$��T�z���,�#�c�$�i�i��,�,�,�-�D��9�$�$��m�m�D�)�)�A�-�-���%5�!6�!6�6��=�D�� 4�#�c�$�i�i��"4�"4� 4�5�D�$��)�$� �m�m�D�)�)�A�-�-� .2�� �*��G�A�J�J��W�~�~�� � � � /��G�%5�6�6� /� /���1���6�6�-1�-@��-C�D�'��*��+�A�.�� � �!#� �#2�#8�#8�#:�#:� /� /�� �4�"�2�2�D�2�2��"�)�)�#�.�.�.�.�!�(�(����7I�)J�)J�K�K�B� �M��!�+�-�-� � � � � !0��� ���!�!�!�� rWr�r'c�"�g|_g|_|���D]k\}}|j�t ||j|����|j�t ||j|�����ldSrc)r.r�rKr%rrr�rl)rr�r�r�s rU�_set_formats_and_typesz"StataWriter._set_formats_and_types s���"$�� �"$�� � �,�,�.�.� M� M�J�C�� �L� � � ;�E�4�9�S�>� R� R� S� S� S� �L� � � 4�U�D�I�c�N� K� K� L� L� L� L� M� MrWc��|���}|jr+|���}t|t��r|}|�|��}t |��}|�|��}tj d|j d��|_ |� |��}d�|D��}|j �|��}|xj |zc_ |j�|��|�|��}|j \|_|_||_|j ���|_|j}|D]6}||jvr� t3j||jd��r d|j|<�7t9|j|j��|_|jD]8}t;|j|��} tj| ��|j|<�9|���|� |��|j�4|jD].}t|tB��r|j||j"|<�-dSdS)NFrBc��g|] }|j�� SrC)r)rRr�s rUrVz/StataWriter._prepare_pandas.<locals>.<listcomp>: s��G�G�G�3�3�;�G�G�GrWr�r~)#r�r~� reset_indexr�r$r�rr�r��repeatr�r�r�r�r5r��extendr��nobs�nvarr��tolistr`r�r�rr�r�rfr_r��_encode_stringsr�rjr.) rr��tempr��non_cat_columns�has_non_cat_val_labelsr�r�rI�new_types rUr�zStataWriter._prepare_pandas sy���y�y�{�{�� � � ��#�#�%�%�D��$� �*�*� ����'�'��-�-��$�D�)�)���!�!�$�'�'��"$��5�$�*�Q�-�!@�!@��� $�A�A�$�G�G��G�G�2F�G�G�G��!%��!2�!2�?�!C�!C�� ���"8�8��� ��!�!�"6�7�7�7��)�)�$�/�/��#�z��� �4�9��� ��|�*�*�,�,�� ����� 0� 0�C��d�)�)�)����t�C�y���4�4� 0�+/��#�C�(��8� � ��� � ����&� 2� 2�C�6�t�7J�3�7O�P�P�H�!�x��1�1�F�K�� � � ������ �#�#�F�+�+�+� � � *��*� A� A���c�3�'�'�A�(,�(;�C�(@�D�L��%�� +� *� A� ArWc��|j}t|dg��}t|j��D]�\}}||vs||vr�|j|}|j}|jt jur�t|d���}|dks-t|��dks|j }td|�d����|j|j � |j��}tt!|j����|jkr ||j|<��dS) z� Encode strings in dta-specific encoding Do not encode columns marked for date conversion or for strL conversion. The strL converter independently handles conversion and also accepts empty string arrays. � _convert_strlTr�r`rzColumn `a` cannot be exported. Only string-like object arrays containing all strings or a mix of strings and None can be exported. Object arrays containing only null values are prohibited. Other object types cannot be exported and must first be converted to one of the supported types.N)r�r[rr�r�rgr�rkrr#r�rurFr$rrrr��_max_string_length) rr�� convert_strlr-r�r;r��inferred_dtype�encodeds rUr�zStataWriter._encode_stringsa s/���+� ��t�_�b�9�9� ��� �*�*� -� -�F�A�s��M�!�!�S�L�%8�%8���Y�s�^�F��L�E��z�R�Z�'�'�!,�V�D�!A�!A�!A��'�8�3�3��F� � �q�8H�8H� �+�C�$�� ��������)�C�.�,�3�3�D�N�C�C��)��w��)G�)G�H�H��.�/�/�&-�D�I�c�N��1 -� -rWc ���t|jd|jd|j���5|_|jjd�S|jjt��c|_|j_|jj � |jj�� |� |j |j ���|���|���|���|���|���|���|���|���|���|���}|�|��|���|���|���|���|���n�#t:$r�}|j���t?|jt@tBj"f��r�tBj#�$|j��r\ tCj%|j��nA#tL$r4tOj(d|j�d�tRtU��� ��YnwxYw|�d}~wwxYw ddd��dS#1swxYwYdS) a Export DataFrame object to Stata dta format. Examples -------- >>> df = pd.DataFrame({"fully_labelled": [1, 2, 3, 3, 1], ... "partially_labelled": [1.0, 2.0, np.nan, 9.0, np.nan], ... "Y": [7, 7, 9, 8, 10], ... "Z": pd.Categorical(["j", "k", "l", "k", "j"]), ... }) >>> path = "/My_path/filename.dta" >>> labels = {"fully_labelled": {1: "one", 2: "two", 3: "three"}, ... "partially_labelled": {1.0: "one", 2.0: "two"}, ... } >>> writer = pd.io.stata.StataWriter(path, ... df, ... value_labels=labels) # doctest: +SKIP >>> writer.write_file() # doctest: +SKIP >>> df = pd.read_stata(path) # doctest: +SKIP >>> df # doctest: +SKIP index fully_labelled partially_labeled Y Z 0 0 one one 7 j 1 1 two two 7 k 2 2 three NaN 9 l 3 3 three 9.0 8 k 4 4 one NaN 10 j �wbF)r�rr7�methodN)rDrFz!This save was not successful but z. could not be deleted. This file is not valid.r�)+r)r�r�r7rr�rrr��created_handlesr%� _write_headerrdrf� _write_map�_write_variable_types�_write_varnames�_write_sortlist�_write_formats�_write_value_label_names�_write_variable_labels�_write_expansion_fields�_write_characteristics� _prepare_data� _write_data� _write_strls�_write_value_labels�_write_file_close_tag�_close� Exceptionrr�rF�os�PathLike�path�isfile�unlink�OSErrorr�r�rr)r�records�excs rU� write_filezStataWriter.write_file� s%��8� �K� ��)�� �0�  � � �/ � �\��|�'��1�=�:>��9L�g�i�i�6��!�4�<�#6�� �,�3�3�D�L�4G�H�H�H�" ��"�"�#�/�D�<L�#�������!�!�!��*�*�,�,�,��$�$�&�&�&��$�$�&�&�&��#�#�%�%�%��-�-�/�/�/��+�+�-�-�-��,�,�.�.�.��+�+�-�-�-��,�,�.�.��� � ��)�)�)��!�!�#�#�#��(�(�*�*�*��*�*�,�,�,����!�!�!�� � � � � � ��� � � �� �"�"�$�$�$��d�k�C���+=�>�>� �2�7�>�>��K�D�D� ��� �$�+�.�.�.�.��"���� � �B�� �B�B�B�+�'7�'9�'9� ����������� ����� �����A/ �/ �/ �/ �/ �/ �/ �/ �/ �/ �/ �/ ����/ �/ �/ �/ �/ �/ s\�A+K�E"G3�2K�3 K�=A#J>�!I;�:J>�;;J9�6J>�8J9�9J>�>K�K�K�Kc���|j�rt|jjt��sJ�|jj|jc}|j_|jj�|�����dSdS)z� Close the file if it was created by the writer. If a buffer or file-like object was passed in, for example a GzipFile, then leave this file open for the caller to close. N)r�r�rrrr2r6)rr7s rUr�zStataWriter._close� sp�� � � (��d�l�1�7�;�;� ;� ;�;�'+�|�':�D�<M� $�C���$� �L� � %� %�c�l�l�n�n� 5� 5� 5� 5� 5� )� (rWc��dS��No-op, future compatibilityNrCr_s rUr�zStataWriter._write_map� r�rWc��dSr�rCr_s rUr�z!StataWriter._write_file_close_tag� r�rWc��dSr�rCr_s rUr�z"StataWriter._write_characteristics� r�rWc��dSr�rCr_s rUr�zStataWriter._write_strls� r�rWc�L�|�tdd����dS)z"Write 5 zeros for expansion fieldsr�r�N)r�r4r_s rUr�z#StataWriter._write_expansion_fields� s$�� � � �J�r�1�%�%�&�&�&�&�&rWc�t�|jD]/}|�|�|j�����0dSrc)r�r�r<r4)rr's rUr�zStataWriter._write_value_labels� sI���$� H� H�B� � � �b�5�5�d�o�F�F� G� G� G� G� H� HrWc �0�|j}|�tjdd����|�|dkrdpd��|�d��|�d��|�tj|dz|j��dd���|�tj|d z|j��dd ���|�7|�|�td d ������n>|�|�t|dd �d ������|�tj ��}n$t|t��std ���gd�}d�t|��D��}|�d��||jz|�d��z}|�|�|����dS)Nrrr�rT��r[r�r�r-r�r��P�"time_stamp should be datetime type� �Jan�Feb�Mar�Apr�May�Jun�Jul�Aug�Sep�Oct�Nov�Decc� �i|] \}}|dz|�� SrQrC�rRr-r^s rU� <dictcomp>z-StataWriter._write_header.<locals>.<dictcomp># �"��G�G�G���E��A��u�G�G�GrW�%d � %Y %H:%M)r4r�r�r3r�r�r��_null_terminate_bytesr4r�nowr�rur�strftimer^)rrDrFr)�months� month_lookup�tss rUr�zStataWriter._write_header� s �� �O� � ���&�+�c�3�/�/�0�0�0� � � �I��$�/��9�6�:�:�:� � � �F���� � � �F���� ���&�+�i�#�o�t�y�A�A�"�1�"�E�F�F�F� ���&�+�i�#�o�t�y�A�A�"�1�"�E�F�F�F� � � � � �d�8�8��B��9K�9K�L�L� M� M� M� M� � � ��*�*�:�j��"��o�r�+J�+J�K�K� � � � � �!����J�J��J��1�1� C��A�B�B� B�  �  �  ��H�G�Y�v�5F�5F�G�G�G� � � � �� &� &��:�+�,� -��!�!�+�.�.� /� � ���$�4�4�R�8�8�9�9�9�9�9rWc�j�|jD]*}|�tjd|�����+dS)Nr/)r�r�r�r3)rr�s rUr�z!StataWriter._write_variable_types+ s@���<� 5� 5�C� � � �f�k�#�s�3�3� 4� 4� 4� 4� 5� 5rWc��|jD]D}|�|��}t|dd�d��}|�|���EdS)Nr.r�)r`�_null_terminate_strr4r�)rr�s rUr�zStataWriter._write_varnames/ s^���L� � �D��+�+�D�1�1�D��d�3�B�3�i��,�,�D� �K�K�� � � � � � rWc�f�tdd|jdzz��}|�|��dS)Nr�r�rB)r4r�r�)r�srtlists rUr�zStataWriter._write_sortlist7 s4���R��d�i�!�m�!4�5�5�� � � �G�����rWc�`�|jD]%}|�t|d�����&dS)Nr�)r.r�r4)rrEs rUr�zStataWriter._write_formats< s<���<� -� -�C� �K�K� �3��+�+� ,� ,� ,� ,� -� -rWc�4�t|j��D]�}|j|rP|j|}|�|��}t |dd�d��}|�|���_|�t dd������dS)Nr.r�r�)r5r�r�r`r r4r�)rr-r�s rUr�z$StataWriter._write_value_label_namesA s����t�y�!�!� 0� 0�A��%�a�(� 0��|�A����/�/��5�5��!�$�s��s�)�R�0�0��� � �D�!�!�!�!�� � �J�r�2�.�.�/�/�/�/� 0� 0rWc���tdd��}|j�.t|j��D]}|�|���dS|jD]�}||jvr}|j|}t |��dkrtd���td�|D����}|std���|�t|d������|�|����dS)Nr�r�r��.Variable labels must be 80 characters or fewerc3�<K�|]}t|��dkV��dS)�N)�ordr�s rUr�z5StataWriter._write_variable_labels.<locals>.<genexpr>[ s,����<�<���A���� �<�<�<�<�<�<rWzKVariable labels must contain only characters that can be encoded in Latin-1) r4r}r5r�r�r�r#rur6)r�blankr-r�r:� is_latin1s rUr�z"StataWriter._write_variable_labelsM s���2�r�"�"�� � � (��4�9�%�%� #� #��� � �E�"�"�"�"� �F��9� #� #�C��d�+�+�+��-�c�2���u�:�:��?�?�$�%U�V�V�V��<�<�e�<�<�<�<�<� � ��$�4����� � �J�u�b�1�1�2�2�2�2�� � �E�"�"�"�"� #� #rWc��|S)r�rC)rr�s rU�_convert_strlszStataWriter._convert_strlse s��� rW�np.rec.recarrayc�V�|j}|j}|j}|j�=t|��D]-\}}||vr$t |||j|��||<�.|�|��}i}|jttj ��k}t|��D]�\}}||}||j kr�tj ��5tjddt���||�d��} ddd��n #1swxYwY| �t$|f���||<d|��} | ||<||�| ��||<��||j} |s| �|j��} | ||<��|�d|���S) N�ignorezDowncasting object dtype arrays)r(r�)�argsr|F)rO� column_dtypes)r�r�r�rr�r.rr4rrr)r�r��catch_warnings�filterwarningsr%r�r�r4r�r�r�� to_records) rr�r�r�r-r�r��native_byteorderr��dc�styper�s rUr�zStataWriter._prepare_datai s���y���,���+� � � � *�#�D�/�/� � ���3�� �%�%� >��S� �4�<��?�!�!�D��I���"�"�4�(�(�����?�o�c�m�.L�.L�L����o�o� $� $�F�A�s��!�*�C��d�-�-�-��,�.�.�.�.��+� �9�!.����� �c��)�)�"�-�-�B� .�.�.�.�.�.�.�.�.�.�.����.�.�.�.��H�H�Z�s�f�H�=�=��S� �!�C� � ��#��s� � ��I�,�,�U�3�3��S� � ��S� ���'�@�!�.�.�t��?�?�E�#��s� � ����U�&��A�A�As� 8D�D �D r�c�T�|�|�����dSrc)r��tobytes�rr�s rUr�zStataWriter._write_data� s&�� ���'�/�/�+�+�,�,�,�,�,rWr�c��|dz }|S)Nr[rC)r�s rUr zStataWriter._null_terminate_str� s�� �V� ���rWc�\�|�|���|j��Src)r r$r)rr�s rUrz!StataWriter._null_terminate_bytes� s&���'�'��*�*�1�1�$�.�A�A�ArW)NTNNNNr�N)rtrxr�r$r�ryrzr�r)r�rFr{rDr�rIryr�r.r7r�rr|rGr )r�rFrGr )rkr*rGr )r�r$rGr�rKr>�r�rFrGrF�r�r'rGr )r�r$rGr �NN�rDr�rFr{rGr )rGr)r�rrGr )r�rFrGrF)r�rFrGr*))r?r@rArBr�rrqrr�r�r�r�r�r�r�r�r�r�r�r�r�r�r�r�r�r�r�r�r�r�r�r�r�r�rr�r�� staticmethodr rrNrOs@rUrvrv�sT�������� L�L�\��-6�I�6�6�6�6� 59� � $�&*�!%�6:�*1�15�$L�AE�$L�$L�$L�$L�$L�$L�$L�$L�LC�C�C�C� )�)�)�)� $�$�$�$�B(9�(9�(9�(9�T����"+�+�+�+�����<G�G�G�G�RM�M�M�M�@A�@A�@A�@A�D#-�#-�#-�#-�JK�K�K�K�Z 6� 6� 6� 6�*�*�*�*�*�*�*�*�*�*�*�*�*�*�*�*�'�'�'�'�H�H�H�H� "&�&*�5:�5:�5:�5:�5:�n5�5�5�5��������� -�-�-�-� 0� 0� 0� 0�#�#�#�#�0����%B�%B�%B�%B�N-�-�-�-������\��B�B�B�B�B�B�B�BrWrvc��|rdS|jtjur;tt |j����}t |d��}|dkr|SdS|jtjurdS|jtjurdS|jtj urdS|jtj urdS|jtj urdStd |�d ����) a Converts dtype types to stata types. Returns the byte of the given ordinal. See TYPE_MAP and comments for an explanation. This is also explained in the dta spec. 1 - 2045 are strings of this length Pandas Stata 32768 - for object strL 65526 - for int8 byte 65527 - for int16 int 65528 - for int32 long 65529 - for float32 float 65530 - for double double If there are dates to convert, then dtype will already have the correct type inserted. r�rBr�r�r�r�r�r�rhrirj)r�r;rnr�s rU�_dtype_to_stata_type_117r0� s���$���u� �z�R�Z���(� �f�n�(E�(E�F�F���x��#�#�� �t� � ��O��u� ��r�z� !� !��u� ��r�z� !� !��u� ��r�x� � ��u� ��r�x� � ��u� ��r�w� � ��u�!�"E�u�"E�"E�"E�F�F�FrW� str | bytesr*c�|�t|t��rt|d��}|d|t|��z zzS)zU Takes a bytes instance and pads it with null bytes until it's length chars. rr,)r�rFr*r#r\s rU�_pad_bytes_newr3� s@���$����$��T�7�#�#�� �'�V�c�$�i�i�/�0� 0�0rWc�8�eZdZdZ ddd�Zdd�Zdd�Zdd�ZdS)�StataStrLWritera� Converter for Stata StrLs Stata StrLs map 8 byte values to strings which are stored using a dictionary-like format where strings are keyed to two values. Parameters ---------- df : DataFrame DataFrame to convert columns : Sequence[str] List of columns names to convert to StrL version : int, optional dta version. Currently supports 117, 118 and 119 byteorder : str, optional Can be ">", "<", "little", or "big". default is `sys.byteorder` Notes ----- Supports creation of the StrL block of a dta file for dta versions 117, 118 and 119. These differ in how the GSO is stored. 118 and 119 store the GSO lookup value as a uint32 and a uint64, while 117 uses two uint32s. 118 and 119 also encode all strings as unicode which is required by the format. 117 uses 'latin-1' a fixed width encoding that extends the 7-bit ascii table with an additional 128 characters. rON�dfr$r�r&rRrjr)r�rGr c�8�|dvrtd���||_||_||_ddi|_|� t j}t|��|_d}d}d|_ |dkr d }d}d |_ n |d krd }nd }ddd|z zz|_ ||_ ||_ dS)NrNz,Only dta versions 117, 118 and 119 supportedr��rrr8r�rrOr�r r'r�r�r�r) ru�_dta_verr6r�� _gso_tablerr)rr4r�_o_offet� _gso_o_type� _gso_v_type)rr6r�rRr)� gso_v_type� gso_o_type�o_sizes rUrzStataStrLWriter.__init__� s��� �/� )� )��K�L�L� L��� ������ ��v�,��� � �� �I�)�)�4�4���� �� � ��� �c�>�>��F��J�&�D�N�N� ��^�^��F�F��F��a�1�v�:�.�/�� �%���%����rWrI�tuple[int, int]c�&�|\}}||j|zzSrc)r;)rrIr�r�s rU� _convert_keyzStataStrLWriter._convert_key s�����1��4�=�1�$�$�$rW�,tuple[dict[str, tuple[int, int]], DataFrame]c�N��|j}|j}t|j���||j}�fd�|jD��}t j|jtj���}t|� ����D]o\}\}}t|��D]W\} \} } || } | �dn| } |� | d��} | �| dz|dzf} | || <|� | ��||| f<�X�pt|j��D]\}} |dd�|f|| <�||fS)a� Generates the GSO lookup table for the DataFrame Returns ------- gso_table : dict Ordered dictionary using the string found as keys and their lookup position (v,o) as values gso_df : DataFrame DataFrame where strl columns have been converted to (v,o) values Notes ----- Modifies the DataFrame in-place. The DataFrame returned encodes the (v,o) values as uint64s. The encoding depends on the dta version, and can be expressed as enc = v + o * 2 ** (o_size * 8) so that v is stored in the lower bits and o is in the upper bits. o_size is * 117: 4 * 118: 6 * 119: 5 c�>��g|]}|��|��f��SrCrX)rRr�r�s �rUrVz2StataStrLWriter.generate_table.<locals>.<listcomp>2 s*���G�G�G�3�c�7�=�=��-�-�.�G�G�GrWr�Nr�rB) r:r6r�r�r��emptyr�r�r�iterrows�getrC)r� gso_table�gso_df�selected� col_indexr4r�r �rowr!r�r�r"rIr-r�s @rU�generate_tablezStataStrLWriter.generate_table sV���:�O� �����v�~�&�&���$�,�'��G�G�G�G�$�,�G�G�G� ��x���b�i�8�8�8��&�x�'8�'8�':�':�;�;� 4� 4�M�A�z��S�(��3�3� 4� 4� ��8�C���#�h���K�b�b�S���m�m�C��.�.���;��q�5�!�a�%�.�C�%(�I�c�N�!�.�.�s�3�3��Q��T� � � 4� �� �-�-� %� %�F�A�s��q�q�q�!�t�*�F�3�K�K��&� � rWrJ�dict[str, tuple[int, int]]r*c �N�t��}tdd��}tj|jdzd��}tj|jdzd��}|j|jz}|j|jz}|jdz}|���D]�\} } | dkr� | \} } |�|��|�tj|| ����|�tj|| ����|�|��t| d��} |�tj|t| ��d z����|�| ��|�|����|� ��S) a� Generates the binary blob of GSOs that is written to the dta file. Parameters ---------- gso_table : dict Ordered dictionary (str, vo) Returns ------- gso : bytes Binary content of dta file to be placed between strl tags Notes ----- Output format depends on dta version. 117 uses two uint32s to express v and o while 118+ uses a uint32 for v and a uint64 for o. r��asciir/r�rr8r8rrB) rr*r�r3r4r=r<rKr2r#r6)rrJr7�gso�gso_type�null�v_type�o_type�len_type�strl�vor�r�� utf8_strings rU� generate_blobzStataStrLWriter.generate_blobD s|��:�i�i���E�7�#�#���;�t���4�c�:�:���{�4�?�S�0�!�4�4����4�#3�3����4�#3�3���?�S�(��!���)�)� � �H�D�"��V�|�|���D�A�q� �I�I�c�N�N�N� �I�I�f�k�&�!�,�,� -� -� -� �I�I�f�k�&�!�,�,� -� -� -� �I�I�h� � � � ��g�.�.�K� �I�I�f�k�(�C� �,<�,<�q�,@�A�A� B� B� B� �I�I�k� "� "� "� �I�I�d�O�O�O�O��|�|�~�~�rW)rON) r6r$r�r&rRrjr)r�rGr )rIrArGrj)rGrD)rJrPrGr*)r?r@rArBrrCrOr\rCrWrUr5r5� s~��������@� $� &�&�&�&�&�B%�%�%�%�1!�1!�1!�1!�f=�=�=�=�=�=rWr5c����eZdZdZdZdZ d;dd�d<�fd �Zed=d&���Zd>d'�Z d?d@d(�Z dAd)�Z dAd*�Z dAd+�Z dAd,�ZdAd-�ZdAd.�ZdAd/�ZdAd0�ZdAd1�ZdAd2�ZdAd3�ZdAd4�ZdAd5�ZdAd6�ZdBd7�ZdCd:�Z�xZS)D�StataWriter117a� A class for writing Stata binary dta files in Stata 13 format (117) Parameters ---------- fname : path (string), buffer or path object string, path object (pathlib.Path or py._path.local.LocalPath) or object implementing a binary write() functions. If using a buffer then the buffer will not be automatically closed after the file is written. data : DataFrame Input to save convert_dates : dict Dictionary mapping columns containing datetime types to stata internal format to use when writing the dates. Options are 'tc', 'td', 'tm', 'tw', 'th', 'tq', 'ty'. Column can be either an integer or a name. Datetime columns that do not have a conversion type specified will be converted to 'tc'. Raises NotImplementedError if a datetime column has timezone information write_index : bool Write the index to Stata dataset. byteorder : str Can be ">", "<", "little", or "big". default is `sys.byteorder` time_stamp : datetime A datetime to use as file creation date. Default is the current time data_label : str A label for the data set. Must be 80 characters or smaller. variable_labels : dict Dictionary containing columns as keys and variable labels as values. Each label must be 80 characters or smaller. convert_strl : list List of columns names to convert to Stata StrL format. Columns with more than 2045 characters are automatically written as StrL. Smaller columns can be converted by including the column name. Using StrLs can reduce output file size when strings are longer than 8 characters, and either frequently repeated or sparse. {compression_options} .. versionchanged:: 1.4.0 Zstandard support. value_labels : dict of dicts Dictionary containing columns as keys and dictionaries of column value to labels as values. The combined length of all labels for a single variable must be 32,000 characters or smaller. .. versionadded:: 1.4.0 Returns ------- writer : StataWriter117 instance The StataWriter117 instance has a write_file method, which will write the file to the given `fname`. Raises ------ NotImplementedError * If datetimes contain timezone information ValueError * Columns listed in convert_dates are neither datetime64[ns] or datetime * Column dtype is not representable in Stata * Column listed in convert_dates is not in DataFrame * Categorical label contains more than 32,000 characters Examples -------- >>> data = pd.DataFrame([[1.0, 1, 'a']], columns=['a', 'b', 'c']) >>> writer = pd.io.stata.StataWriter117('./data_file.dta', data) >>> writer.write_file() Directly write a zip file >>> compression = {"method": "zip", "archive_name": "data_file.dta"} >>> writer = pd.io.stata.StataWriter117( ... './data_file.zip', data, compression=compression ... ) >>> writer.write_file() Or with long strings stored in strl format >>> data = pd.DataFrame([['A relatively long string'], [''], ['']], ... columns=['strls']) >>> writer = pd.io.stata.StataWriter117( ... './data_file_with_long_strings.dta', data, convert_strl=['strls']) >>> writer.write_file() r�rONTr�rwrtrxr�r$r�ryrzr�r)r�rFr{rDrIr��Sequence[Hashable] | Noner�r.r7r�rr|rGr c  ����g|_| �|j�| ��t���||||||||| | | �� � i|_d|_dS)N)r)rFrDrIrr�r7rW)r�r�r�r�_map� _strl_blob)rrtr�r�rzr)rFrDrIr�r�r7rr s �rUrzStataWriter117.__init__� s����".0��� � #� � � %� %�l� 3� 3� 3� ����� � � � ��!�!�+�%�#�+� � � � �%'�� �����rWr"r1�tagrFr*c��t|t��rt|d��}td|zdzd��|ztd|zdzd��zS)zSurround val with <tag></tag>rrUrTz</)r�rFr*)r"rcs rU�_tagzStataWriter117._tag sZ�� �c�3� � � &���W�%�%�C��S�3�Y��_�g�.�.��4�u�T�C�Z�#�=M�w�7W�7W�W�WrWc�n�|jj�J�|jj���|j|<dS)z.Update map location for tag with file positionN)rrr�ra)rrcs rU� _update_mapzStataWriter117._update_map s3���|�"�.�.�.���,�1�1�3�3�� �#���rWc ���|j}|�tdd����t��}|�|�tt |j��d��d����|�|�|dkrdpdd����|jdkrd nd }|�|�tj ||z|j ��d ����|jd krd nd }|�|�tj ||z|j ��d����|� |dd�nd}|� |j ��}|jd krdnd } tj || zt|����} | |z}|�|�|d����|�tj��}n$t#|t��st%d���gd�} d�t'| ��D��} |�d��| |jz|�d��z} dt| d��z}|�|�|d����|�|�|���d����dS)zWrite the file headerz <stata_dta>r�releaserT�MSF�LSFr)r'r2r8�KrOr��NNr�r�r/r:r�r�c� �i|] \}}|dz|�� SrQrCr�s rUrz0StataWriter117._write_header.<locals>.<dictcomp>= rrWrr�� timestamp�header)r4r�r*rr2rerF� _dta_versionr�r3r�r�r$rr#rrr�rurrr^r6)rrDrFr)r7� nvar_type� nobs_sizer:� encoded_label� label_size� label_lenrrr �stata_tss rUr�zStataWriter117._write_header s��� �O� � ���%� �w�7�7�8�8�8��i�i�� � � �$�)�)�E�#�d�&7�"8�"8�'�B�B�I�N�N�O�O�O� � � �$�)�)�I��,�6��?�%��M�M�N�N�N��,��3�3�C�C�� � � � �$�)�)�F�K� �I�(=�t�y�I�I�3�O�O�P�P�P��,��3�3�C�C�� � � � �$�)�)�F�K� �I�(=�t�y�I�I�3�O�O�P�P�P�#-�#9� �3�B�3���r��� � �T�^�4�4� � �-��4�4�S�S�#� ��K� �J� 6��M�8J�8J�K�K� �!�M�1� � � � �$�)�)�M�7�3�3�4�4�4� � �!����J�J��J��1�1� C��A�B�B� B�  �  �  ��H�G�Y�v�5F�5F�G�G�G� � � � �� &� &��:�+�,� -��!�!�+�.�.� /� � �U�2�w�/�/�/�� � � �$�)�)�H�k�2�2�3�3�3� ���$�)�)�C�L�L�N�N�H�=�=�>�>�>�>�>rWc���|js2d|jj���ddddddddddddd�|_|jj�|jd��t ��}|j���D]2}|�tj |j dz|�����3|� |� |� ��d����dS)z� Called twice during file write. The first populates the values in the map with 0s. The second call writes the final map locations when all blocks have been written. r)� stata_data�map�variable_types�varnames�sortlist�formats�value_label_namesrI�characteristicsr��strlsr�stata_data_close� end-of-filer{r�N)rarrr�rtrrwr2r�r3r4r�rer6)rr7r"s rUr�zStataWriter117._write_mapH s��� �y� ���|�*�/�/�1�1�"#����%&�#$�#$��� !�$%� ���D�I�" � �� � ���5�!1�2�2�2��i�i���9�#�#�%�%� ?� ?�C� �I�I�f�k�$�/�C�"7��=�=� >� >� >� >� ���$�)�)�C�L�L�N�N�E�:�:�;�;�;�;�;rWc�6�|�d��t��}|jD]2}|�t j|jdz|�����3|�|�|� ��d����dS)Nr|r2) rgrr�r2r�r3r4r�rer6)rr7r�s rUr�z$StataWriter117._write_variable_typesf s��� ���)�*�*�*��i�i���<� ?� ?�C� �I�I�f�k�$�/�C�"7��=�=� >� >� >� >� ���$�)�)�C�L�L�N�N�4D�E�E�F�F�F�F�FrWc��|�d��t��}|jdkrdnd}|jD]_}|�|��}t |dd��|j��|dz��}|�|���`|� |� |� ��d����dS)Nr}rOr.r0rB) rgrrrr`r r3r$rr2r�rer6)rr7�vn_lenr�s rUr�zStataWriter117._write_varnamesm s��� ����$�$�$��i�i���(�C�/�/���S���L� � �D��+�+�D�1�1�D�!�$�s��s�)�"2�"2�4�>�"B�"B�F�Q�J�O�O�D� �I�I�d�O�O�O�O� ���$�)�)�C�L�L�N�N�J�?�?�@�@�@�@�@rWc��|�d��|jdkrdnd}|�|�d|z|jdzzd����dS)Nr~rPr�r�r,rB)rgrrr�rer�)r� sort_sizes rUr�zStataWriter117._write_sortlistx sd�� ����$�$�$��*�S�0�0�A�A�a� � ���$�)�)�G�i�$7�4�9�q�=�$I�:�V�V�W�W�W�W�WrWc�j�|�d��t��}|jdkrdnd}|jD]=}|�t |�|j��|�����>|�|� |� ��d����dS)NrrOr�r�) rgrrrr.r2r3r$rr�rer6)rr7�fmt_lenrEs rUr�zStataWriter117._write_formats} s��� ����#�#�#��i�i���)�S�0�0�"�"�b���<� K� K�C� �I�I�n�S�Z�Z���%?�%?��I�I� J� J� J� J� ���$�)�)�C�L�L�N�N�I�>�>�?�?�?�?�?rWc��|�d��t��}|jdkrdnd}t|j��D]{}d}|j|r |j|}|�|��}t|dd�� |j ��|dz��}|� |���||� |� |���d����dS)Nr�rOr.r0r�rB)rgrrrr5r�r�r`r r3r$rr2r�rer6)rr7�vl_lenr-r�� encoded_names rUr�z'StataWriter117._write_value_label_names� s��� ���,�-�-�-��i�i���(�C�/�/���S���t�y�!�!� $� $�A��D��%�a�(� '��|�A����+�+�D�1�1�D�)�$�s��s�)�*:�*:�4�>�*J�*J�F�UV�J�W�W�L� �I�I�l� #� #� #� #� ���$�)�)�C�L�L�N�N�4G�H�H�I�I�I�I�IrWc�n�|�d��t��}|jdkrdnd}td|dz��}|j�it |j��D]}|�|���|�|� |� ��d����dS|j D]�}||jvr�|j|}t|��dkrtd��� |�|j��}n*#t $r}td|j����|�d}~wwxYw|�t||dz������|�|����|�|� |� ��d����dS) NrIrOr�i@r�rBrzDVariable labels must contain only characters that can be encoded in )rgrrrr3r}r5r�r2r�rer6r�r#rur$r�UnicodeEncodeError) rr7r�rr�r�r:r�r�s rUr�z%StataWriter117._write_variable_labels� s��� ���*�+�+�+��i�i���(�C�/�/���S���r�6�A�:�.�.�� � � (��4�9�%�%� !� !��� � �%� � � � � � � �d�i�i�� � ���8I�J�J� K� K� K� �F��9� !� !�C��d�+�+�+��-�c�2���u�:�:��?�?�$�%U�V�V�V��#�l�l�4�>�:�:�G�G��)����$�>�-1�^�>�>������������� � � �.��&�1�*�=�=�>�>�>�>�� � �%� � � � � ���$�)�)�C�L�L�N�N�4E�F�F�G�G�G�G�Gs�8D� D:�D5�5D:c��|�d��|�|�dd����dS)Nr�rW)rgr�rer_s rUr�z%StataWriter117._write_characteristics� s@�� ���*�+�+�+� ���$�)�)�C�):�;�;�<�<�<�<�<rWc���|�d��|�d��|�|�����|�d��dS)Nr�s<data>s</data>)rgr�r&r's rUr�zStataWriter117._write_data� sb�� ���� � � � ���)�$�$�$� ���'�/�/�+�+�,�,�,� ���*�%�%�%�%�%rWc��|�d��|�|�|jd����dS)Nr�)rgr�rerbr_s rUr�zStataWriter117._write_strls� s@�� ����!�!�!� ���$�)�)�D�O�W�=�=�>�>�>�>�>rWc��dS)zNo-op in dta 117+NrCr_s rUr�z&StataWriter117._write_expansion_fields� r�rWc�`�|�d��t��}|jD]G}|�|j��}|�|d��}|�|���H|�|�|���d����dS)Nr�lbl) rgrr�r<r4rer2r�r6)rr7r'�labs rUr�z"StataWriter117._write_value_labels� s��� ����(�(�(��i�i���$� � �B��)�)�$�/�:�:�C��)�)�C��'�'�C� �I�I�c�N�N�N�N� ���$�)�)�C�L�L�N�N�N�C�C�D�D�D�D�DrWc��|�d��|�tdd����|�d��dS)Nr�z </stata_dta>rr�)rgr�r*r_s rUr�z$StataWriter117._write_file_close_tag� sO�� ���+�,�,�,� ���%���8�8�9�9�9� ����'�'�'�'�'rWc��|j���D]2\}}||jvr$|j�|��}||j|<�3dS)z� Update column names for conversion to strl if they might have been changed to comply with Stata naming rules N)r�rKr�rO)r�orig�newr s rUr�z!StataWriter117._update_strl_names� sb�� �.�4�4�6�6� .� .�I�D�#��t�)�)�)��(�.�.�t�4�4��*-��"�3�'�� .� .rWc�����fd�t|��D��}|rJt||�j���}|���\}}|}|�|���_|S)zg Convert columns to StrLs if either very large or in the convert_strl variable c�N��g|]!\}}�j|dks |�jv�|��"S)r�)r�r�)rRr-r�rs �rUrVz1StataWriter117._convert_strls.<locals>.<listcomp>� sE��� � � ���3��|�A��%�'�'�3�$�2D�+D�+D� �+D�+D�+DrWrQ)rr5rrrOr\rb)rr�� convert_cols�ssw�tab�new_datas` rUrzStataWriter117._convert_strls� s����  � � � �#�D�/�/� � � � � � 5�!�$� �d�>O�P�P�P�C��.�.�0�0�M�C���D�!�/�/��4�4�D�O�� rWr�r'c�J�g|_g|_|���D]\}}||jv}t ||j||j|���}|j�|��|j�t||j||������dS)N)rmrn) r�r.rKr�rrr�rrr%r0)rr�r�r�rnrEs rUr�z%StataWriter117._set_formats_and_types� s����� ��� � �,�,�.�.� � �J�C���� 2�2�J�-��� �#�� �-�%� ���C� �L� � �� $� $� $� �L� � �(��� �#�� �K�K� � � � � � rW) NTNNNNNr�N)rtrxr�r$r�ryrzr�r)r�rFr{rDr�rIryr�r_r�r.r7r�rr|rGr )r"r1rcrFrGr*)rcrFrGr r,r-r>rKr+)r?r@rArBr�rrrr.rergr�r�r�r�r�r�r�r�r�r�r�r�r�r�r�rr�rNrOs@rUr^r^� sB�������S�S�j���L� 59� � $�&*�!%�6:�26�*1�15�#�AE�#�#�#�#�#�#�#�#�J�X�X�X��\�X� 4�4�4�4�"&�&*�8?�8?�8?�8?�8?�t<�<�<�<�<G�G�G�G� A� A� A� A�X�X�X�X� @�@�@�@� J� J� J� J�H�H�H�H�@=�=�=�=�&�&�&�&� ?�?�?�?� � � � �E�E�E�E�(�(�(�(� .� .� .� .�����$�������rWr^c�X��eZdZUdZdZded< d'dd�d(�fd#�Zd)d&�Z�xZS)*�StataWriterUTF8u� Stata binary dta file writing in Stata 15 (118) and 16 (119) formats DTA 118 and 119 format files support unicode string data (both fixed and strL) format. Unicode is also supported in value labels, variable labels and the dataset label. Format 119 is automatically used if the file contains more than 32,767 variables. Parameters ---------- fname : path (string), buffer or path object string, path object (pathlib.Path or py._path.local.LocalPath) or object implementing a binary write() functions. If using a buffer then the buffer will not be automatically closed after the file is written. data : DataFrame Input to save convert_dates : dict, default None Dictionary mapping columns containing datetime types to stata internal format to use when writing the dates. Options are 'tc', 'td', 'tm', 'tw', 'th', 'tq', 'ty'. Column can be either an integer or a name. Datetime columns that do not have a conversion type specified will be converted to 'tc'. Raises NotImplementedError if a datetime column has timezone information write_index : bool, default True Write the index to Stata dataset. byteorder : str, default None Can be ">", "<", "little", or "big". default is `sys.byteorder` time_stamp : datetime, default None A datetime to use as file creation date. Default is the current time data_label : str, default None A label for the data set. Must be 80 characters or smaller. variable_labels : dict, default None Dictionary containing columns as keys and variable labels as values. Each label must be 80 characters or smaller. convert_strl : list, default None List of columns names to convert to Stata StrL format. Columns with more than 2045 characters are automatically written as StrL. Smaller columns can be converted by including the column name. Using StrLs can reduce output file size when strings are longer than 8 characters, and either frequently repeated or sparse. version : int, default None The dta version to use. By default, uses the size of data to determine the version. 118 is used if data.shape[1] <= 32767, and 119 is used for storing larger DataFrames. {compression_options} .. versionchanged:: 1.4.0 Zstandard support. value_labels : dict of dicts Dictionary containing columns as keys and dictionaries of column value to labels as values. The combined length of all labels for a single variable must be 32,000 characters or smaller. .. versionadded:: 1.4.0 Returns ------- StataWriterUTF8 The instance has a write_file method, which will write the file to the given `fname`. Raises ------ NotImplementedError * If datetimes contain timezone information ValueError * Columns listed in convert_dates are neither datetime64[ns] or datetime * Column dtype is not representable in Stata * Column listed in convert_dates is not in DataFrame * Categorical label contains more than 32,000 characters Examples -------- Using Unicode data and column names >>> from pandas.io.stata import StataWriterUTF8 >>> data = pd.DataFrame([[1.0, 1, 'ᴬ']], columns=['a', 'β', 'ĉ']) >>> writer = StataWriterUTF8('./data_file.dta', data) >>> writer.write_file() Directly write a zip file >>> compression = {"method": "zip", "archive_name": "data_file.dta"} >>> writer = StataWriterUTF8('./data_file.zip', data, compression=compression) >>> writer.write_file() Or with long strings stored in strl format >>> data = pd.DataFrame([['ᴀ relatively long ŝtring'], [''], ['']], ... columns=['strls']) >>> writer = StataWriterUTF8('./data_file_with_long_strings.dta', data, ... convert_strl=['strls']) >>> writer.write_file() rzLiteral['utf-8']rNTr�rwrtrxr�r$r�ryrzr�r)r�rFr{rDrIr�r_rRr�r�r.r7r�rr|rGr c ���| �|jddkrdnd} n9| dvrtd���| dkr |jddkrtd���t���||||||||| | | | �� � | |_dS) NrBi�r'rP)r'rPz"version must be either 118 or 119.zKYou must use version 119 for data sets containing more than32,767 variables) r�rzr)rFrDrIrr�r�r7)r�rur�rrr)rrtr�r�rzr)rFrDrIr�rRr�r7rr s �rUrzStataWriterUTF8.__init__ds����" �?�!�Z��]�e�3�3�c�c��G�G� �J� &� &��A�B�B� B� ��^�^�� �1� �� 5� 5��#��� � ����� � �'�#��!�!�+�%�%�#�+� � � � �$����rWr�rFc���|D]u}t|��dkr*|dks|dkr|dks|dkr|dks|dkr|dks dt|��cxkrd ksn|d vr|�|d��}�v|S) a� Validate variable names for Stata export. Parameters ---------- name : str Variable name Returns ------- str The validated name with invalid characters replaced with underscores. Notes ----- Stata 118+ support most unicode characters. The only limitation is in the ascii range where the characters supported are a-z, A-Z, 0-9 and _. r0r�r�r�r�r�r�r���>�×�÷)rr�r�s rUr�z'StataWriterUTF8._validate_variable_name�s���*� ,� ,�A���F�F�S�L�L��S���A��G�G��S���A��G�G��S���A��G�G��S����#�a�&�&�&�&�&�&�3�&�&�&�&�� �$�$��|�|�A�s�+�+���� rW) NTNNNNNNr�N)rtrxr�r$r�ryrzr�r)r�rFr{rDr�rIryr�r_rRr�r�r.r7r�rr|rGr r*) r?r@rArBrrqrr�rNrOs@rUr�r�s���������^�^�@#*�I�)�)�)�)� 59� � $�&*�!%�6:�26�"�*1�15�*$�AE�*$�*$�*$�*$�*$�*$�*$�*$�X#�#�#�#�#�#�#�#rWr�)rDr'rErFrGr'rK)r6r�r�r�r�r�r�r�r�r�r�r�r�r�r�r�r�r�rPr�r�r.r7r�rGrQ)rUrFrGrF)r�r r�rjrGr )rErFrGrl)r�r�r`rarGr�)r�rlr;r'rGrj)r�F)r;r'rmrjrnr�rGrF)r�rlr;r'rnr�rGrj)r�r1r�rjrGr*)vrB� __future__r� collectionsrrr�iorr�r�r�typingrr r r r r r��numpyr�� pandas._libsr�pandas._libs.libr�pandas._libs.writersr� pandas.errorsrrrr�pandas.util._decoratorsrr�pandas.util._exceptionsr�pandas.core.dtypes.baser�pandas.core.dtypes.commonrrr�pandas.core.dtypes.dtypesr�pandasrrrr r!r"r#�pandas.core.framer$�pandas.core.indexes.baser%�pandas.core.indexes.ranger&�pandas.core.seriesr'�pandas.core.shared_docsr(�pandas.io.commonr)�collections.abcr*r+�typesr,r-�pandas._typingr.r/r0r1r2r3r_�_statafile_processing_params1�_statafile_processing_params2�_chunksize_params�_iterator_params� _reader_notes�_read_stata_docrMrLr�rCrqr�r�r�r�r�r�r�rrrEr�ry�Iteratorr�rTrr4r_rfrlrrrvr0r3r5r^r�rCrWrU�<module>r�s��� � � �#�"�"�"�"�"��������������������� � � � � � � � � � � � �������������������������������(�(�(�(�(�(�5�5�5�5�5�5������������� ��������5�4�4�4�4�4�2�2�2�2�2�2����������� 7�6�6�6�6�6�������������������(�'�'�'�'�'�*�*�*�*�*�*�0�0�0�0�0�0�%�%�%�%�%�%�0�0�0�0�0�0�'�'�'�'�'�'�����������$�#�#�#�#�#�����������������������4��!N�� !G��$�� "��(� �8��8�8� �!8�8�"�#8�8�$�%8�8�&�%�&�)=�=�'8�8�(�� �)8�8�>�?8�8�8��t �� � �� � � �������������%�&����� ��������$O�N�N� ��X�d�A�q�)�)� �)�)�)�)�]�]�]�]�@h7�h7�h7�h7�V(������������ #������ �� � � � �)������y�y�y�y�xv�v�v�v�v�v�v�v�r%�%�%�%�%�O�%�%�%�>C�C�C�C�C�C�C�C�L\ �\ �\ �\ �\ �\ �\ �\ �~|&�|&�|&�|&�|&�+�s�|�|&�|&�|&�~ ��/����!%� �!� �$(�#� ��&-�-1�!�!�!�!�!���!�HD�D�D�D�0�0�0�0�C�C�C�C�2 � � � �!G�!G�!G�!G�JGL�*G�*G�*G�*G�*G�Z�� �!2�3�$�%:�;�g�E����K B�K B�K B�K B�K B�+�K B�K B� ��K B�\'G�'G�'G�'G�T1�1�1�1�r�r�r�r�r�r�r�r�jz�z�z�z�z�[�z�z�z�z r�r�r�r�r�n�r�r�r�r�rrW
Memory