in HowTo/gRPC/Linux/OpenAI/LangChain/PyServer/venv/Lib/numpy/lib/npyio.py [0:0]
def _read(fname, *, delimiter=',', comment='#', quote='"',
imaginary_unit='j', usecols=None, skiplines=0,
max_rows=None, converters=None, ndmin=None, unpack=False,
dtype=np.float64, encoding="bytes"):
r"""
Read a NumPy array from a text file.
Parameters
----------
fname : str or file object
The filename or the file to be read.
delimiter : str, optional
Field delimiter of the fields in line of the file.
Default is a comma, ','. If None any sequence of whitespace is
considered a delimiter.
comment : str or sequence of str or None, optional
Character that begins a comment. All text from the comment
character to the end of the line is ignored.
Multiple comments or multiple-character comment strings are supported,
but may be slower and `quote` must be empty if used.
Use None to disable all use of comments.
quote : str or None, optional
Character that is used to quote string fields. Default is '"'
(a double quote). Use None to disable quote support.
imaginary_unit : str, optional
Character that represent the imaginay unit `sqrt(-1)`.
Default is 'j'.
usecols : array_like, optional
A one-dimensional array of integer column numbers. These are the
columns from the file to be included in the array. If this value
is not given, all the columns are used.
skiplines : int, optional
Number of lines to skip before interpreting the data in the file.
max_rows : int, optional
Maximum number of rows of data to read. Default is to read the
entire file.
converters : dict or callable, optional
A function to parse all columns strings into the desired value, or
a dictionary mapping column number to a parser function.
E.g. if column 0 is a date string: ``converters = {0: datestr2num}``.
Converters can also be used to provide a default value for missing
data, e.g. ``converters = lambda s: float(s.strip() or 0)`` will
convert empty fields to 0.
Default: None
ndmin : int, optional
Minimum dimension of the array returned.
Allowed values are 0, 1 or 2. Default is 0.
unpack : bool, optional
If True, the returned array is transposed, so that arguments may be
unpacked using ``x, y, z = read(...)``. When used with a structured
data-type, arrays are returned for each field. Default is False.
dtype : numpy data type
A NumPy dtype instance, can be a structured dtype to map to the
columns of the file.
encoding : str, optional
Encoding used to decode the inputfile. The special value 'bytes'
(the default) enables backwards-compatible behavior for `converters`,
ensuring that inputs to the converter functions are encoded
bytes objects. The special value 'bytes' has no additional effect if
``converters=None``. If encoding is ``'bytes'`` or ``None``, the
default system encoding is used.
Returns
-------
ndarray
NumPy array.
Examples
--------
First we create a file for the example.
>>> s1 = '1.0,2.0,3.0\n4.0,5.0,6.0\n'
>>> with open('example1.csv', 'w') as f:
... f.write(s1)
>>> a1 = read_from_filename('example1.csv')
>>> a1
array([[1., 2., 3.],
[4., 5., 6.]])
The second example has columns with different data types, so a
one-dimensional array with a structured data type is returned.
The tab character is used as the field delimiter.
>>> s2 = '1.0\t10\talpha\n2.3\t25\tbeta\n4.5\t16\tgamma\n'
>>> with open('example2.tsv', 'w') as f:
... f.write(s2)
>>> a2 = read_from_filename('example2.tsv', delimiter='\t')
>>> a2
array([(1. , 10, b'alpha'), (2.3, 25, b'beta'), (4.5, 16, b'gamma')],
dtype=[('f0', '<f8'), ('f1', 'u1'), ('f2', 'S5')])
"""
# Handle special 'bytes' keyword for encoding
byte_converters = False
if encoding == 'bytes':
encoding = None
byte_converters = True
if dtype is None:
raise TypeError("a dtype must be provided.")
dtype = np.dtype(dtype)
read_dtype_via_object_chunks = None
if dtype.kind in 'SUM' and (
dtype == "S0" or dtype == "U0" or dtype == "M8" or dtype == 'm8'):
# This is a legacy "flexible" dtype. We do not truly support
# parametric dtypes currently (no dtype discovery step in the core),
# but have to support these for backward compatibility.
read_dtype_via_object_chunks = dtype
dtype = np.dtype(object)
if usecols is not None:
# Allow usecols to be a single int or a sequence of ints, the C-code
# handles the rest
try:
usecols = list(usecols)
except TypeError:
usecols = [usecols]
_ensure_ndmin_ndarray_check_param(ndmin)
if comment is None:
comments = None
else:
# assume comments are a sequence of strings
if "" in comment:
raise ValueError(
"comments cannot be an empty string. Use comments=None to "
"disable comments."
)
comments = tuple(comment)
comment = None
if len(comments) == 0:
comments = None # No comments at all
elif len(comments) == 1:
# If there is only one comment, and that comment has one character,
# the normal parsing can deal with it just fine.
if isinstance(comments[0], str) and len(comments[0]) == 1:
comment = comments[0]
comments = None
else:
# Input validation if there are multiple comment characters
if delimiter in comments:
raise TypeError(
f"Comment characters '{comments}' cannot include the "
f"delimiter '{delimiter}'"
)
# comment is now either a 1 or 0 character string or a tuple:
if comments is not None:
# Note: An earlier version support two character comments (and could
# have been extended to multiple characters, we assume this is
# rare enough to not optimize for.
if quote is not None:
raise ValueError(
"when multiple comments or a multi-character comment is "
"given, quotes are not supported. In this case quotechar "
"must be set to None.")
if len(imaginary_unit) != 1:
raise ValueError('len(imaginary_unit) must be 1.')
_check_nonneg_int(skiplines)
if max_rows is not None:
_check_nonneg_int(max_rows)
else:
# Passing -1 to the C code means "read the entire file".
max_rows = -1
fh_closing_ctx = contextlib.nullcontext()
filelike = False
try:
if isinstance(fname, os.PathLike):
fname = os.fspath(fname)
if isinstance(fname, str):
fh = np.lib._datasource.open(fname, 'rt', encoding=encoding)
if encoding is None:
encoding = getattr(fh, 'encoding', 'latin1')
fh_closing_ctx = contextlib.closing(fh)
data = fh
filelike = True
else:
if encoding is None:
encoding = getattr(fname, 'encoding', 'latin1')
data = iter(fname)
except TypeError as e:
raise ValueError(
f"fname must be a string, filehandle, list of strings,\n"
f"or generator. Got {type(fname)} instead.") from e
with fh_closing_ctx:
if comments is not None:
if filelike:
data = iter(data)
filelike = False
data = _preprocess_comments(data, comments, encoding)
if read_dtype_via_object_chunks is None:
arr = _load_from_filelike(
data, delimiter=delimiter, comment=comment, quote=quote,
imaginary_unit=imaginary_unit,
usecols=usecols, skiplines=skiplines, max_rows=max_rows,
converters=converters, dtype=dtype,
encoding=encoding, filelike=filelike,
byte_converters=byte_converters)
else:
# This branch reads the file into chunks of object arrays and then
# casts them to the desired actual dtype. This ensures correct
# string-length and datetime-unit discovery (like `arr.astype()`).
# Due to chunking, certain error reports are less clear, currently.
if filelike:
data = iter(data) # cannot chunk when reading from file
c_byte_converters = False
if read_dtype_via_object_chunks == "S":
c_byte_converters = True # Use latin1 rather than ascii
chunks = []
while max_rows != 0:
if max_rows < 0:
chunk_size = _loadtxt_chunksize
else:
chunk_size = min(_loadtxt_chunksize, max_rows)
next_arr = _load_from_filelike(
data, delimiter=delimiter, comment=comment, quote=quote,
imaginary_unit=imaginary_unit,
usecols=usecols, skiplines=skiplines, max_rows=max_rows,
converters=converters, dtype=dtype,
encoding=encoding, filelike=filelike,
byte_converters=byte_converters,
c_byte_converters=c_byte_converters)
# Cast here already. We hope that this is better even for
# large files because the storage is more compact. It could
# be adapted (in principle the concatenate could cast).
chunks.append(next_arr.astype(read_dtype_via_object_chunks))
skiprows = 0 # Only have to skip for first chunk
if max_rows >= 0:
max_rows -= chunk_size
if len(next_arr) < chunk_size:
# There was less data than requested, so we are done.
break
# Need at least one chunk, but if empty, the last one may have
# the wrong shape.
if len(chunks) > 1 and len(chunks[-1]) == 0:
del chunks[-1]
if len(chunks) == 1:
arr = chunks[0]
else:
arr = np.concatenate(chunks, axis=0)
# NOTE: ndmin works as advertised for structured dtypes, but normally
# these would return a 1D result plus the structured dimension,
# so ndmin=2 adds a third dimension even when no squeezing occurs.
# A `squeeze=False` could be a better solution (pandas uses squeeze).
arr = _ensure_ndmin_ndarray(arr, ndmin=ndmin)
if arr.shape:
if arr.shape[0] == 0:
warnings.warn(
f'loadtxt: input contained no data: "{fname}"',
category=UserWarning,
stacklevel=3
)
if unpack:
# Unpack structured dtypes if requested:
dt = arr.dtype
if dt.names is not None:
# For structured arrays, return an array for each field.
return [arr[field] for field in dt.names]
else:
return arr.T
else:
return arr