
    ibL                        d dl Z d dlZd dlZd dlZd dlmZ d dlmZmZm	Z	m
Z
 d dlmZ d dlmZmZmZmZ g dZ e
d      Z e
dd	
      Zeeef   Zeedf   Z e
dee      Z G d dee         Z G d dee   e	e         Z G d deeedf            Z G d dee         Z G d dee         Z G d de      Z  G d dee         Z!efdee   dee"e#z     dedz  de$e!e      fdZ%y)     N)Sequence)castGenericIterableTypeVar)
deprecated)default_generator	GeneratorrandpermTensor)DatasetIterableDatasetTensorDatasetStackDatasetConcatDatasetChainDatasetSubsetrandom_split_T_T_coT)	covariant._T_stackc                   $    e Zd ZdZdefdZddZy)r   a  An abstract class representing a :class:`Dataset`.

    All datasets that represent a map from keys to data samples should subclass
    it. All subclasses should overwrite :meth:`__getitem__`, supporting fetching a
    data sample for a given key. Subclasses could also optionally overwrite
    :meth:`__len__`, which is expected to return the size of the dataset by many
    :class:`~torch.utils.data.Sampler` implementations and the default options
    of :class:`~torch.utils.data.DataLoader`. Subclasses could also
    optionally implement :meth:`__getitems__`, for speedup batched samples
    loading. This method accepts list of indices of samples of batch and returns
    list of samples.

    .. note::
      :class:`~torch.utils.data.DataLoader` by default constructs an index
      sampler that yields integral indices.  To make it work with a map-style
      dataset with non-integral indices/keys, a custom sampler must be provided.
    returnc                     t        d      )Nz3Subclasses of Dataset should implement __getitem__.)NotImplementedErrorselfindexs     R/var/www/html/engine/venv/lib/python3.12/site-packages/torch/utils/data/dataset.py__getitem__zDataset.__getitem__:   s    !"WXX    c                     t        | |g      S N)r   r   others     r    __add__zDataset.__add__A   s    dE]++r"   N)r&   zDataset[_T_co]r   zConcatDataset[_T_co])__name__
__module____qualname____doc__r   r!   r'    r"   r    r   r   '   s    $YE Y,r"   r   c                   "    e Zd ZdZdee   fdZy)r   a?  An iterable Dataset.

    All datasets that represent an iterable of data samples should subclass it.
    Such form of datasets is particularly useful when data come from a stream.

    All subclasses should overwrite :meth:`__iter__`, which would return an
    iterator of samples in this dataset.

    When a subclass is used with :class:`~torch.utils.data.DataLoader`, each
    item in the dataset will be yielded from the :class:`~torch.utils.data.DataLoader`
    iterator. When :attr:`num_workers > 0`, each worker process will have a
    different copy of the dataset object, so it is often desired to configure
    each copy independently to avoid having duplicate data returned from the
    workers. :func:`~torch.utils.data.get_worker_info`, when called in a worker
    process, returns information about the worker. It can be used in either the
    dataset's :meth:`__iter__` method or the :class:`~torch.utils.data.DataLoader` 's
    :attr:`worker_init_fn` option to modify each copy's behavior.

    Example 1: splitting workload across all workers in :meth:`__iter__`::

        >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_DATALOADER)
        >>> # xdoctest: +SKIP("Fails on MacOS12")
        >>> class MyIterableDataset(torch.utils.data.IterableDataset):
        ...     def __init__(self, start, end):
        ...         super(MyIterableDataset).__init__()
        ...         assert end > start, "this example only works with end >= start"
        ...         self.start = start
        ...         self.end = end
        ...
        ...     def __iter__(self):
        ...         worker_info = torch.utils.data.get_worker_info()
        ...         if worker_info is None:  # single-process data loading, return the full iterator
        ...             iter_start = self.start
        ...             iter_end = self.end
        ...         else:  # in a worker process
        ...             # split workload
        ...             per_worker = int(math.ceil((self.end - self.start) / float(worker_info.num_workers)))
        ...             worker_id = worker_info.id
        ...             iter_start = self.start + worker_id * per_worker
        ...             iter_end = min(iter_start + per_worker, self.end)
        ...         return iter(range(iter_start, iter_end))
        ...
        >>> # should give same set of data as range(3, 7), i.e., [3, 4, 5, 6].
        >>> ds = MyIterableDataset(start=3, end=7)

        >>> # Single-process loading
        >>> print(list(torch.utils.data.DataLoader(ds, num_workers=0)))
        [tensor([3]), tensor([4]), tensor([5]), tensor([6])]

        >>> # xdoctest: +REQUIRES(POSIX)
        >>> # Multi-process loading with two worker processes
        >>> # Worker 0 fetched [3, 4].  Worker 1 fetched [5, 6].
        >>> # xdoctest: +IGNORE_WANT("non deterministic")
        >>> print(list(torch.utils.data.DataLoader(ds, num_workers=2)))
        [tensor([3]), tensor([5]), tensor([4]), tensor([6])]

        >>> # With even more workers
        >>> # xdoctest: +IGNORE_WANT("non deterministic")
        >>> print(list(torch.utils.data.DataLoader(ds, num_workers=12)))
        [tensor([3]), tensor([5]), tensor([4]), tensor([6])]

    Example 2: splitting workload across all workers using :attr:`worker_init_fn`::

        >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_DATALOADER)
        >>> class MyIterableDataset(torch.utils.data.IterableDataset):
        ...     def __init__(self, start, end):
        ...         super(MyIterableDataset).__init__()
        ...         assert end > start, "this example only works with end >= start"
        ...         self.start = start
        ...         self.end = end
        ...
        ...     def __iter__(self):
        ...         return iter(range(self.start, self.end))
        ...
        >>> # should give same set of data as range(3, 7), i.e., [3, 4, 5, 6].
        >>> ds = MyIterableDataset(start=3, end=7)

        >>> # Single-process loading
        >>> print(list(torch.utils.data.DataLoader(ds, num_workers=0)))
        [3, 4, 5, 6]
        >>>
        >>> # Directly doing multi-process loading yields duplicate data
        >>> print(list(torch.utils.data.DataLoader(ds, num_workers=2)))
        [3, 3, 4, 4, 5, 5, 6, 6]

        >>> # Define a `worker_init_fn` that configures each dataset copy differently
        >>> def worker_init_fn(worker_id):
        ...     worker_info = torch.utils.data.get_worker_info()
        ...     dataset = worker_info.dataset  # the dataset copy in this worker process
        ...     overall_start = dataset.start
        ...     overall_end = dataset.end
        ...     # configure the dataset to only process the split workload
        ...     per_worker = int(math.ceil((overall_end - overall_start) / float(worker_info.num_workers)))
        ...     worker_id = worker_info.id
        ...     dataset.start = overall_start + worker_id * per_worker
        ...     dataset.end = min(dataset.start + per_worker, overall_end)
        ...

        >>> # Mult-process loading with the custom `worker_init_fn`
        >>> # Worker 0 fetched [3, 4].  Worker 1 fetched [5, 6].
        >>> print(list(torch.utils.data.DataLoader(ds, num_workers=2, worker_init_fn=worker_init_fn)))
        [3, 5, 4, 6]

        >>> # With even more workers
        >>> print(list(torch.utils.data.DataLoader(ds, num_workers=12, worker_init_fn=worker_init_fn)))
        [3, 4, 5, 6]
    r&   c                     t        | |g      S r$   )r   r%   s     r    r'   zIterableDataset.__add__   s    T5M**r"   N)r(   r)   r*   r+   r   r   r'   r,   r"   r    r   r   I   s    jX+WU^ +r"   r   c                   H    e Zd ZU dZeedf   ed<   deddfdZd Zde	fdZ
y)	r   zDataset wrapping tensors.

    Each sample will be retrieved by indexing tensors along the first dimension.

    Args:
        *tensors (Tensor): tensors that have the same size of the first dimension.
    .tensorsr   Nc                 R    t        fdD              rt        d      | _        y )Nc              3   j   K   | ]*  }d    j                  d       |j                  d       k7   , yw)r   N)size).0tensorr0   s     r    	<genexpr>z)TensorDataset.__init__.<locals>.<genexpr>   s+     Jwqzq!V[[^3Js   03zSize mismatch between tensors)allAssertionErrorr0   )r   r0   s    `r    __init__zTensorDataset.__init__   s$    J'JJ !@AAr"   c                 @    t        fd| j                  D              S )Nc              3   (   K   | ]	  }|     y wr$   r,   )r4   r5   r   s     r    r6   z,TensorDataset.__getitem__.<locals>.<genexpr>   s     >vVE]>   )tupler0   r   s    `r    r!   zTensorDataset.__getitem__   s    >>>>r"   c                 >    | j                   d   j                  d      S Nr   )r0   r3   r   s    r    __len__zTensorDataset.__len__   s    ||A##A&&r"   )r(   r)   r*   r+   r=   r   __annotations__r9   r!   intrA   r,   r"   r    r   r      s<     63; D 
?' 'r"   r   c                   `    e Zd ZU dZeez  ed<   dee   dee   ddfdZ	d Z
d	efd
ZdefdZy)r   a  Dataset as a stacking of multiple datasets.

    This class is useful to assemble different parts of complex input data, given as datasets.

    Example:
        >>> # xdoctest: +SKIP
        >>> images = ImageDataset()
        >>> texts = TextDataset()
        >>> tuple_stack = StackDataset(images, texts)
        >>> tuple_stack[0] == (images[0], texts[0])
        >>> dict_stack = StackDataset(image=images, text=texts)
        >>> dict_stack[0] == {"image": images[0], "text": texts[0]}

    Args:
        *args (Dataset): Datasets for stacking returned as tuple.
        **kwargs (Dataset): Datasets for stacking returned as dict.
    datasetsargskwargsr   Nc                 V    |rG|rt        d      t        |d          _        t         fd|D              rt        d      | _        y |rSt        |j                               }t        |d          _        t         fd|D              rt        d      | _        y t        d      )NztSupported either ``tuple``- (via ``args``) or``dict``- (via ``kwargs``) like input/output, but both types are given.r   c              3   N   K   | ]  }j                   t        |      k7    y wr$   _lengthlenr4   datasetr   s     r    r6   z(StackDataset.__init__.<locals>.<genexpr>   s     DG4<<3w</D   "%zSize mismatch between datasetsc              3   N   K   | ]  }j                   t        |      k7    y wr$   rJ   rM   s     r    r6   z(StackDataset.__init__.<locals>.<genexpr>   s     CG4<<3w</CrO   z%At least one dataset should be passed)
ValueErrorrL   rK   anyrE   listvalues)r   rF   rG   tmps   `   r    r9   zStackDataset.__init__   s     ^  tAw<DLDtDD !ABB DMv}}'Cs1v;DLCsCC !ABB"DMDEEr"   c                     t        | j                  t              r1| j                  j                         D ci c]  \  }}||    c}}S t	        fd| j                  D              S c c}}w )Nc              3   (   K   | ]	  }|     y wr$   r,   )r4   rN   r   s     r    r6   z+StackDataset.__getitem__.<locals>.<genexpr>   s     AWU^Ar<   )
isinstancerE   dictitemsr=   )r   r   krN   s    `  r    r!   zStackDataset.__getitem__   sW    dmmT*8<8K8K8MN*!WAwu~%NNA4==AAA Os   A+indicesc           	         t        | j                  t              r|D cg c]  }i  }}| j                  j                         D ]  \  }}t	        t        |dd             rg|j                  |      }t        |      t        |      k7  r#t        dt        |       dt        |             t        ||d      D ]
  \  }}|||<    t        ||d      D ]  \  }	}||	   ||<     |S |D cg c]  }g  }
}| j                  D ]  }t	        t        |dd             rs|j                  |      }t        |      t        |      k7  r#t        dt        |       dt        |             t        ||
d      D ]  \  }}|j                  |        t        ||
d      D ]  \  }	}|j                  ||	            |
D cg c]  }t        |       }}|S c c}w c c}w c c}w )N__getitems__z0Nested dataset's output size mismatch. Expected z, got Tstrict)rX   rE   rY   rZ   callablegetattrr^   rL   rQ   zipappendr=   )r   r\   _
dict_batchr[   rN   rZ   datad_sampleidx
list_batcht_samplesampletuple_batchs                 r    r^   zStackDataset.__getitems__  s   dmmT*5<(=(=J(="mm113 3
7GG^TBC#009E5zS\1()),WfSZLJ  +.eZ*M +h&*+ *-Wj)N 3X&-cl33  /6!6"!6
!6}} 	2G>?,,W5u:W-$%%(\N&UF  '*%D&I *ND(OOD)* &)*T%J 2MCOOGCL12	2 DN&NuV}&N&NA )>" "7 'Os   	G-	G G c                     | j                   S r$   )rK   r@   s    r    rA   zStackDataset.__len__'  s    ||r"   )r(   r)   r*   r+   r=   rY   rB   r   r   r9   r!   rS   r^   rC   rA   r,   r"   r    r   r      sX    $ dlFgen F F4 F(B
#D #J r"   r   c                        e Zd ZU dZeee      ed<   ee   ed<   e	d        Z
dee   ddf fdZdefdZd	 Ze ed
e      d               Z xZS )r   zDataset as a concatenation of multiple datasets.

    This class is useful to assemble different existing datasets.

    Args:
        datasets (sequence): List of datasets to be concatenated
    rE   cumulative_sizesc                 d    g d}}| D ]&  }t        |      }|j                  ||z          ||z  }( |S r?   )rL   rd   )sequencersels        r    cumsumzConcatDataset.cumsum7  sB    11 	AAAHHQUOFA	 r"   r   Nc                     t         |           t        |      | _        t	        | j                        dk(  rt        d      | j                  D ]  }t        |t              st        d       | j                  | j                        | _	        y )Nr   z(datasets should not be an empty iterablez.ConcatDataset does not support IterableDataset)
superr9   rS   rE   rL   r8   rX   r   rw   rp   )r   rE   d	__class__s      r    r9   zConcatDataset.__init__@  sx    Xt}}" !KLL 	WA!_-$%UVV	W !%DMM :r"   c                      | j                   d   S )Nrp   r@   s    r    rA   zConcatDataset.__len__J  s    $$R((r"   c                     |dk  r(| t        |       kD  rt        d      t        |       |z   }t        j                  | j                  |      }|dk(  r|}n|| j                  |dz
     z
  }| j
                  |   |   S )Nr   z8absolute value of index should not exceed dataset length   )rL   rQ   bisectbisect_rightrp   rE   )r   ri   dataset_idx
sample_idxs       r    r!   zConcatDataset.__getitem__M  s    7tc$i N  d)c/C))$*?*?E!Jt44[1_EEJ}}[)*55r"   z>`cummulative_sizes` attribute is renamed to `cumulative_sizes`)categoryc                     | j                   S r$   r~   r@   s    r    cummulative_sizeszConcatDataset.cummulative_sizes[  s     $$$r"   )r(   r)   r*   r+   rS   r   r   rB   rC   staticmethodrw   r   r9   rA   r!   propertyr   FutureWarningr   __classcell__r{   s   @r    r   r   +  s     75>""3i ;'!2 ;t ;) )6 H%	 
%r"   r   c                   D     e Zd ZdZdee   ddf fdZd ZdefdZ	 xZ
S )r   a_  Dataset for chaining multiple :class:`IterableDataset` s.

    This class is useful to assemble different existing dataset streams. The
    chaining operation is done on-the-fly, so concatenating large-scale
    datasets with this class will be efficient.

    Args:
        datasets (iterable of IterableDataset): datasets to be chained together
    rE   r   Nc                 0    t         |           || _        y r$   )ry   r9   rE   )r   rE   r{   s     r    r9   zChainDataset.__init__o  s     r"   c              #   |   K   | j                   D ]'  }t        |t              st        d      |E d {    ) y 7 w)N*ChainDataset only supports IterableDataset)rE   rX   r   r8   )r   rz   s     r    __iter__zChainDataset.__iter__s  s;      	Aa1$%QRRLL	 s   0<:<c                 ~    d}| j                   D ]+  }t        |t              st        d      |t	        |      z  }- |S )Nr   r   )rE   rX   r   r8   rL   )r   totalrz   s      r    rA   zChainDataset.__len__y  sD     	Aa1$%QRRSVOE	 r"   )r(   r)   r*   r+   r   r   r9   r   rC   rA   r   r   s   @r    r   r   d  s1    !'!2 !t ! r"   r   c                       e Zd ZU dZee   ed<   ee   ed<   dee   dee   ddfdZ	d Z
dee   dee   fdZdefd	Zy)
r   z
    Subset of a dataset at specified indices.

    Args:
        dataset (Dataset): The whole Dataset
        indices (sequence): Indices in the whole set selected for subset
    rN   r\   r   Nc                      || _         || _        y r$   )rN   r\   )r   rN   r\   s      r    r9   zSubset.__init__  s    r"   c                     t        |t              r*| j                  |D cg c]  }| j                  |    c}   S | j                  | j                  |      S c c}w r$   )rX   rS   rN   r\   )r   ri   is      r    r!   zSubset.__getitem__  sK    c4 <<# >Qa >??||DLL-.. !?s   Ac                    t        t        | j                  dd             r6| j                  j                  |D cg c]  }| j                  |    c}      S |D cg c]  }| j                  | j                  |        c}S c c}w c c}w )Nr^   )ra   rb   rN   r^   r\   )r   r\   ri   s      r    r^   zSubset.__getitems__  sn     GDLL.$?@<<,,7-SCdll3.?-STT?FGDLLc!23GG .TGs   B#Bc                 ,    t        | j                        S r$   )rL   r\   r@   s    r    rA   zSubset.__len__  s    4<<  r"   )r(   r)   r*   r+   r   r   rB   r   rC   r9   r!   rS   r^   rA   r,   r"   r    r   r     sn     U^c] # 4 /
HDI H$u+ H! !r"   r   rN   lengths	generatorr   c           
      Z   t        j                  t        |      d      rt        |      dk  rg }t        |      D ]P  \  }}|dk  s|dkD  rt	        d| d      t        j
                  t        |       |z        }|j                  |       R t        |       t        |      z
  }t        |      D ]  }|t        |      z  }||xx   dz  cc<    |}t        |      D ]&  \  }}	|	dk(  st        j                  d| dd       ( t        |      t        |       k7  rt	        d	      t        t        |      |
      j                         }
t        t        t           |      }t!        t#        j$                  |      |d      D 	cg c]  \  }}	t'        | |
||	z
  |        c}	}S c c}	}w )a  
    Randomly split a dataset into non-overlapping new datasets of given lengths.

    If a list of fractions that sum up to 1 is given,
    the lengths will be computed automatically as
    floor(frac * len(dataset)) for each fraction provided.

    After computing the lengths, if there are any remainders, 1 count will be
    distributed in round-robin fashion to the lengths
    until there are no remainders left.

    Optionally fix the generator for reproducible results, e.g.:

    Example:
        >>> # xdoctest: +SKIP
        >>> generator1 = torch.Generator().manual_seed(42)
        >>> generator2 = torch.Generator().manual_seed(42)
        >>> random_split(range(10), [3, 7], generator=generator1)
        >>> random_split(range(30), [0.3, 0.3, 0.4], generator=generator2)

    Args:
        dataset (Dataset): Dataset to be split
        lengths (sequence): lengths or fractions of splits to be produced
        generator (Generator): Generator used for the random permutation.
    r   r   zFraction at index z is not between 0 and 1zLength of split at index z- is 0. This might result in an empty dataset.   )
stacklevelzDSum of input lengths does not equal the length of the input dataset!)r   Tr_   )mathisclosesum	enumeraterQ   floorrL   rd   rangewarningswarnr   tolistr   r   rC   rc   	itertools
accumulater   )rN   r   r   subset_lengthsr   fracn_items_in_split	remainderidx_to_add_atlengthr\   offsets               r    r   r     s   < ||CL!$W):$& ) 	4GAtax4!8 #5aS8O!PQQ#zz#g,*=>!!"23		4
 L3~#66	y! 	/AN 33M=)Q.)	/ !"7+ 	IAv{/s 3= > 	 7|s7|#R
 	
 s7|y9@@BG8C='*G ")"6"6w"?QUVFF 	w&9:  s   F')&r   r   r   r   collections.abcr   typingr   r   r   r   typing_extensionsr   torchr	   r
   r   r   __all__r   r   rY   str_T_dictr=   _T_tupler   r   r   r   r   r   r   r   rC   floatrS   r   r,   r"   r    <module>r      s?       $ 4 3 ( A @	 T]4(
sEz
:x1,gen ,Dn+genhuo n+h'GE&#+./ '.T78$ Tn6%GEN 6%r? <!WU^ !H #4>R[>cEk"> 4> 
&*	>r"   