runtime/under-io-module.cpp (1,642 lines of code) (raw):
// Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com)
#include "under-io-module.h"
#include "builtins.h"
#include "bytes-builtins.h"
#include "byteslike.h"
#include "file.h"
#include "frame.h"
#include "globals.h"
#include "int-builtins.h"
#include "modules.h"
#include "object-builtins.h"
#include "objects.h"
#include "os.h"
#include "runtime.h"
#include "str-builtins.h"
#include "thread.h"
#include "type-builtins.h"
#include "unicode.h"
namespace py {
RawObject FUNC(_io, _BytesIO_guard)(Thread* thread, Arguments args) {
HandleScope scope(thread);
Object self_obj(&scope, args.get(0));
if (!thread->runtime()->isInstanceOfBytesIO(*self_obj)) {
return thread->raiseRequiresType(self_obj, ID(BytesIO));
}
return NoneType::object();
}
RawObject FUNC(_io, _BytesIO_closed_guard)(Thread* thread, Arguments args) {
HandleScope scope(thread);
Object self_obj(&scope, args.get(0));
if (!thread->runtime()->isInstanceOfBytesIO(*self_obj)) {
return thread->raiseRequiresType(self_obj, ID(BytesIO));
}
BytesIO self(&scope, *self_obj);
if (self.closed()) {
return thread->raiseWithFmt(LayoutId::kValueError,
"I/O operation on closed file.");
}
return NoneType::object();
}
RawObject FUNC(_io, _BytesIO_seek)(Thread* thread, Arguments args) {
HandleScope scope(thread);
Runtime* runtime = thread->runtime();
Object offset_obj(&scope, args.get(1));
if (!runtime->isInstanceOfInt(*offset_obj)) {
return Unbound::object();
}
Object whence_obj(&scope, args.get(2));
if (!runtime->isInstanceOfInt(*whence_obj)) {
return Unbound::object();
}
Object self_obj(&scope, args.get(0));
if (!runtime->isInstanceOfBytesIO(*self_obj)) {
return thread->raiseRequiresType(self_obj, ID(BytesIO));
}
BytesIO self(&scope, *self_obj);
if (self.closed()) {
return thread->raiseWithFmt(LayoutId::kValueError,
"I/O operation on closed file.");
}
Int offset_int(&scope, intUnderlying(*offset_obj));
word offset = offset_int.asWordSaturated();
if (!SmallInt::isValid(offset)) {
return thread->raiseWithFmt(
LayoutId::kOverflowError,
"cannot fit offset into an index-sized integer");
}
Int whence_int(&scope, intUnderlying(*whence_obj));
word whence = whence_int.asWordSaturated();
word result;
switch (whence) {
case 0:
if (offset < 0) {
return thread->raiseWithFmt(LayoutId::kValueError,
"Negative seek value %d", offset);
}
self.setPos(offset);
return SmallInt::fromWord(offset);
case 1:
result = Utils::maximum(word{0}, self.pos() + offset);
self.setPos(result);
return SmallInt::fromWord(result);
case 2:
result = Utils::maximum(
word{0}, MutableBytes::cast(self.buffer()).length() + offset);
self.setPos(result);
return SmallInt::fromWord(result);
default:
if (SmallInt::isValid(whence)) {
return thread->raiseWithFmt(LayoutId::kValueError,
"Invalid whence (%w, should be 0, 1 or 2)",
whence);
}
return thread->raiseWithFmt(LayoutId::kOverflowError,
"Python int too large to convert to C long");
}
}
RawObject FUNC(_io, _BytesIO_truncate)(Thread* thread, Arguments args) {
HandleScope scope(thread);
Object self(&scope, args.get(0));
Runtime* runtime = thread->runtime();
if (!runtime->isInstanceOfBytesIO(*self)) {
return thread->raiseRequiresType(self, ID(BytesIO));
}
BytesIO bytes_io(&scope, *self);
if (bytes_io.closed()) {
return thread->raiseWithFmt(LayoutId::kValueError,
"I/O operation on closed file.");
}
Object size_obj(&scope, args.get(1));
word size;
if (size_obj.isNoneType()) {
size = bytes_io.pos();
} else {
if (size_obj.isError()) return *size_obj;
Int size_int(&scope, intUnderlying(*size_obj));
// Allow SmallInt, Bool, and subclasses of Int containing SmallInt or Bool
if (!size_int.isSmallInt() && !size_int.isBool()) {
return thread->raiseWithFmt(LayoutId::kOverflowError,
"cannot fit '%T' into an index-sized integer",
&size_int);
}
size = size_int.asWord();
if (size < 0) {
return thread->raiseWithFmt(LayoutId::kValueError,
"negative size value %d", size);
}
}
if (size < bytes_io.numItems()) {
bytes_io.setNumItems(size);
bytes_io.setPos(size);
}
return SmallInt::fromWord(size);
}
RawObject FUNC(_io, _StringIO_closed_guard)(Thread* thread, Arguments args) {
HandleScope scope(thread);
Runtime* runtime = thread->runtime();
Object self_obj(&scope, args.get(0));
if (!runtime->isInstanceOfStringIO(*self_obj)) {
return thread->raiseRequiresType(self_obj, ID(StringIO));
}
StringIO self(&scope, *self_obj);
if (self.closed()) {
return thread->raiseWithFmt(LayoutId::kValueError,
"I/O operation on closed file.");
}
return NoneType::object();
}
RawObject FUNC(_io, _StringIO_seek)(Thread* thread, Arguments args) {
HandleScope scope(thread);
Object offset_obj(&scope, args.get(1));
Object whence_obj(&scope, args.get(2));
Runtime* runtime = thread->runtime();
if (!runtime->isInstanceOfInt(*offset_obj) ||
!runtime->isInstanceOfInt(*whence_obj)) {
return Unbound::object();
}
Object self_obj(&scope, args.get(0));
if (!runtime->isInstanceOfStringIO(*self_obj)) {
return thread->raiseRequiresType(self_obj, ID(StringIO));
}
StringIO self(&scope, *self_obj);
if (self.closed()) {
return thread->raiseWithFmt(LayoutId::kValueError,
"I/O operation on closed file.");
}
word offset = intUnderlying(*offset_obj).asWordSaturated();
if (!SmallInt::isValid(offset)) {
return thread->raiseWithFmt(
LayoutId::kOverflowError,
"cannot fit offset into an index-sized integer");
}
if (!runtime->isInstanceOfInt(*whence_obj)) {
return thread->raiseWithFmt(LayoutId::kTypeError,
"Invalid whence (should be 0, 1 or 2)");
}
word whence = intUnderlying(*whence_obj).asWordSaturated();
switch (whence) {
case 0:
if (offset < 0) {
return thread->raiseWithFmt(LayoutId::kValueError,
"Negative seek position %d", offset);
}
self.setPos(offset);
return *offset_obj;
case 1:
if (offset != 0) {
return thread->raiseWithFmt(LayoutId::kOSError,
"Can't do nonzero cur-relative seeks");
}
return SmallInt::fromWord(self.pos());
case 2: {
if (offset != 0) {
return thread->raiseWithFmt(LayoutId::kOSError,
"Can't do nonzero end-relative seeks");
}
word new_pos = MutableBytes::cast(self.buffer()).length();
self.setPos(new_pos);
return SmallInt::fromWord(new_pos);
}
default:
if (SmallInt::isValid(whence)) {
return thread->raiseWithFmt(LayoutId::kValueError,
"Invalid whence (%w, should be 0, 1 or 2)",
whence);
} else {
return thread->raiseWithFmt(
LayoutId::kOverflowError,
"Python int too large to convert to C long");
}
}
}
static RawObject initReadBuf(Thread* thread,
const BufferedReader& buffered_reader) {
HandleScope scope(thread);
word buffer_size = buffered_reader.bufferSize();
MutableBytes read_buf(
&scope, thread->runtime()->newMutableBytesUninitialized(buffer_size));
buffered_reader.setReadBuf(*read_buf);
buffered_reader.setReadPos(0);
buffered_reader.setBufferNumBytes(0);
return *read_buf;
}
// If there is no buffer allocated yet, allocate one. If there are remaining
// bytes in the buffer, move them to position 0; Set buffer read position to 0.
static RawObject rewindOrInitReadBuf(Thread* thread,
const BufferedReader& buffered_reader) {
HandleScope scope(thread);
Object read_buf_obj(&scope, buffered_reader.readBuf());
word read_pos = buffered_reader.readPos();
if (read_pos > 0) {
MutableBytes read_buf(&scope, *read_buf_obj);
word buffer_num_bytes = buffered_reader.bufferNumBytes();
read_buf.replaceFromWithStartAt(0, *read_buf, buffer_num_bytes - read_pos,
read_pos);
buffered_reader.setBufferNumBytes(buffer_num_bytes - read_pos);
buffered_reader.setReadPos(0);
return *read_buf;
}
if (read_buf_obj.isNoneType()) {
return initReadBuf(thread, buffered_reader);
}
return *read_buf_obj;
}
// Perform one read operation to re-fill the buffer.
static RawObject fillBuffer(Thread* thread, const Object& raw_file,
const MutableBytes& buffer,
word* buffer_num_bytes) {
HandleScope scope(thread);
word buffer_size = buffer.length();
word wanted = buffer_size - *buffer_num_bytes;
Object wanted_int(&scope, SmallInt::fromWord(wanted));
Object result_obj(&scope,
thread->invokeMethod2(raw_file, ID(read), wanted_int));
if (result_obj.isError()) {
if (result_obj.isErrorException()) return *result_obj;
if (result_obj.isErrorNotFound()) {
if (raw_file.isNoneType()) {
return thread->raiseWithFmt(LayoutId::kValueError,
"raw stream has been detached");
}
Object name(&scope, thread->runtime()->symbols()->at(ID(read)));
return objectRaiseAttributeError(thread, raw_file, name);
}
}
if (result_obj.isNoneType()) return NoneType::object();
Runtime* runtime = thread->runtime();
Bytes bytes(&scope, Bytes::empty());
word length;
if (runtime->isInstanceOfBytes(*result_obj)) {
bytes = bytesUnderlying(*result_obj);
length = bytes.length();
} else if (runtime->isInstanceOfBytearray(*result_obj)) {
Bytearray byte_array(&scope, *result_obj);
bytes = byte_array.items();
length = byte_array.numItems();
} else if (runtime->isByteslike(*result_obj)) {
UNIMPLEMENTED("byteslike");
} else {
return thread->raiseWithFmt(LayoutId::kTypeError,
"read() should return bytes");
}
if (length == 0) return Bytes::empty();
if (length > wanted && wanted != -1) {
UNIMPLEMENTED("read() returned too many bytes");
}
buffer.replaceFromWithBytes(*buffer_num_bytes, *bytes, length);
*buffer_num_bytes += length;
return Unbound::object();
}
// Helper function for read requests that are bigger (or close to) than the size
// of the buffer.
static RawObject readBig(Thread* thread, const BufferedReader& buffered_reader,
word num_bytes) {
HandleScope scope(thread);
Runtime* runtime = thread->runtime();
word available = buffered_reader.bufferNumBytes() - buffered_reader.readPos();
DCHECK(num_bytes == kMaxWord || num_bytes > available,
"num_bytes should be big");
// TODO(T59000373): We could specialize this to avoid the intermediate
// allocations when the size of the result is known and `readinto` is
// available.
word length = available;
Object chunks(&scope, NoneType::object());
Object chunk(&scope, NoneType::object());
Object raw_file(&scope, buffered_reader.underlying());
Bytes bytes(&scope, Bytes::empty());
for (;;) {
word wanted = (num_bytes == kMaxWord) ? 32 * kKiB : num_bytes - available;
Object wanted_int(&scope, SmallInt::fromWord(wanted));
Object result_obj(&scope,
thread->invokeMethod2(raw_file, ID(read), wanted_int));
if (result_obj.isError()) {
if (result_obj.isErrorException()) return *result_obj;
if (result_obj.isErrorNotFound()) {
if (raw_file.isNoneType()) {
return thread->raiseWithFmt(LayoutId::kValueError,
"raw stream has been detached");
}
Object name(&scope, runtime->symbols()->at(ID(read)));
return objectRaiseAttributeError(thread, raw_file, name);
}
}
if (result_obj.isNoneType()) {
if (length == 0) return NoneType::object();
break;
}
word chunk_length;
if (runtime->isInstanceOfBytes(*result_obj)) {
bytes = bytesUnderlying(*result_obj);
chunk = *bytes;
chunk_length = bytes.length();
} else if (runtime->isInstanceOfBytearray(*result_obj)) {
Bytearray byte_array(&scope, *result_obj);
bytes = byte_array.items();
chunk = *byte_array;
chunk_length = byte_array.numItems();
} else if (runtime->isByteslike(*result_obj)) {
UNIMPLEMENTED("byteslike");
} else {
return thread->raiseWithFmt(LayoutId::kTypeError,
"read() should return bytes");
}
if (chunk_length == 0) {
if (length == 0) return *chunk;
break;
}
if (chunk_length > wanted) {
UNIMPLEMENTED("read() returned too many bytes");
}
if (chunks.isNoneType()) {
chunks = runtime->newList();
}
List list(&scope, *chunks);
runtime->listAdd(thread, list, chunk);
length += chunk_length;
if (num_bytes != kMaxWord) {
num_bytes -= chunk_length;
if (num_bytes <= 0) break;
}
}
MutableBytes result(&scope, runtime->newMutableBytesUninitialized(length));
word idx = 0;
if (available > 0) {
result.replaceFromWithStartAt(idx,
MutableBytes::cast(buffered_reader.readBuf()),
available, buffered_reader.readPos());
idx += available;
buffered_reader.setReadPos(0);
buffered_reader.setBufferNumBytes(0);
}
if (!chunks.isNoneType()) {
List list(&scope, *chunks);
for (word i = 0, num_items = list.numItems(); i < num_items; i++) {
chunk = list.at(i);
word chunk_length;
if (chunk.isBytes()) {
bytes = *chunk;
chunk_length = bytes.length();
} else {
Bytearray byte_array(&scope, *chunk);
bytes = byte_array.items();
chunk_length = byte_array.numItems();
}
result.replaceFromWithBytes(idx, *bytes, chunk_length);
idx += chunk_length;
}
}
DCHECK(idx == length, "mismatched length");
return result.becomeImmutable();
}
RawObject FUNC(_io, _buffered_reader_clear_buffer)(Thread* thread,
Arguments args) {
HandleScope scope(thread);
Runtime* runtime = thread->runtime();
Object self_obj(&scope, args.get(0));
if (!runtime->isInstanceOfBufferedReader(*self_obj)) {
return thread->raiseRequiresType(self_obj, ID(BufferedReader));
}
BufferedReader self(&scope, *self_obj);
self.setReadPos(0);
self.setBufferNumBytes(0);
return NoneType::object();
}
RawObject FUNC(_io, _buffered_reader_init)(Thread* thread, Arguments args) {
HandleScope scope(thread);
Runtime* runtime = thread->runtime();
Object self_obj(&scope, args.get(0));
if (!runtime->isInstanceOfBufferedReader(*self_obj)) {
return thread->raiseRequiresType(self_obj, ID(BufferedReader));
}
BufferedReader self(&scope, *self_obj);
Int buffer_size_obj(&scope, intUnderlying(args.get(1)));
if (!buffer_size_obj.isSmallInt() && !buffer_size_obj.isBool()) {
return thread->raiseWithFmt(LayoutId::kOverflowError,
"cannot fit value into an index-sized integer");
}
word buffer_size = buffer_size_obj.asWord();
DCHECK(buffer_size > 0, "invalid buffer size");
self.setBufferSize(buffer_size);
self.setReadPos(0);
self.setBufferNumBytes(0);
// readBuf() starts out as `None` and is initialized lazily so patterns like
// just doing a single `read()` on the whole buffered reader will not even
// bother allocating the read buffer. There may however be already a
// `_read_buf` allocated previously when `_init` is used to clear the buffer
// as part of `seek`.
if (!self.readBuf().isNoneType() &&
MutableBytes::cast(self.readBuf()).length() != buffer_size) {
return thread->raiseWithFmt(LayoutId::kValueError, "length mismatch");
}
return NoneType::object();
}
RawObject FUNC(_io, _buffered_reader_peek)(Thread* thread, Arguments args) {
// TODO(T58490915): Investigate what thread safety guarantees python has,
// and add locking code as necessary.
HandleScope scope(thread);
Runtime* runtime = thread->runtime();
Object self_obj(&scope, args.get(0));
if (!runtime->isInstanceOfBufferedReader(*self_obj)) {
return thread->raiseRequiresType(self_obj, ID(BufferedReader));
}
BufferedReader self(&scope, *self_obj);
Object num_bytes_obj(&scope, args.get(1));
// TODO(T59004416) Is there a way to push intFromIndex() towards managed?
Object num_bytes_int_obj(&scope, intFromIndex(thread, num_bytes_obj));
if (num_bytes_int_obj.isErrorException()) return *num_bytes_int_obj;
Int num_bytes_int(&scope, intUnderlying(*num_bytes_int_obj));
if (!num_bytes_int.isSmallInt() && !num_bytes_int.isBool()) {
return thread->raiseWithFmt(LayoutId::kOverflowError,
"cannot fit value into an index-sized integer");
}
word num_bytes = num_bytes_int.asWord();
word buffer_num_bytes = self.bufferNumBytes();
word read_pos = self.readPos();
Object read_buf_obj(&scope, self.readBuf());
word available = buffer_num_bytes - read_pos;
if (num_bytes <= 0 || num_bytes > available) {
// Perform a lightweight "reset" of the read buffer that does not move data
// around.
if (read_buf_obj.isNoneType()) {
read_buf_obj = initReadBuf(thread, self);
} else if (available == 0) {
buffer_num_bytes = 0;
read_pos = 0;
self.setReadPos(0);
self.setBufferNumBytes(0);
}
// Attempt a single read to fill the buffer.
MutableBytes read_buf(&scope, *read_buf_obj);
Object raw_file(&scope, self.underlying());
Object fill_result(
&scope, fillBuffer(thread, raw_file, read_buf, &buffer_num_bytes));
if (fill_result.isErrorException()) return *fill_result;
self.setBufferNumBytes(buffer_num_bytes);
available = buffer_num_bytes - read_pos;
}
Bytes read_buf(&scope, *read_buf_obj);
return bytesSubseq(thread, read_buf, read_pos, available);
}
RawObject FUNC(_io, _buffered_reader_read)(Thread* thread, Arguments args) {
// TODO(T58490915): Investigate what thread safety guarantees python has,
// and add locking code as necessary.
HandleScope scope(thread);
Runtime* runtime = thread->runtime();
Object self_obj(&scope, args.get(0));
if (!runtime->isInstanceOfBufferedReader(*self_obj)) {
return thread->raiseRequiresType(self_obj, ID(BufferedReader));
}
BufferedReader self(&scope, *self_obj);
Object num_bytes_obj(&scope, args.get(1));
word num_bytes;
if (num_bytes_obj.isNoneType()) {
num_bytes = kMaxWord;
} else {
// TODO(T59004416) Is there a way to push intFromIndex() towards managed?
Object num_bytes_int_obj(&scope, intFromIndex(thread, num_bytes_obj));
if (num_bytes_int_obj.isErrorException()) return *num_bytes_int_obj;
Int num_bytes_int(&scope, intUnderlying(*num_bytes_int_obj));
if (!num_bytes_int.isSmallInt() && !num_bytes_int.isBool()) {
return thread->raiseWithFmt(
LayoutId::kOverflowError,
"cannot fit value into an index-sized integer");
}
num_bytes = num_bytes_int.asWord();
if (num_bytes == -1) {
num_bytes = kMaxWord;
} else if (num_bytes < 0) {
return thread->raiseWithFmt(LayoutId::kValueError,
"read length must be non-negative or -1");
}
}
word buffer_num_bytes = self.bufferNumBytes();
word read_pos = self.readPos();
word available = buffer_num_bytes - read_pos;
DCHECK(available >= 0, "invalid state");
if (num_bytes <= available) {
word new_read_pos = read_pos + num_bytes;
self.setReadPos(new_read_pos);
Bytes read_buf(&scope, self.readBuf());
return bytesSubseq(thread, read_buf, read_pos, num_bytes);
}
Object raw_file(&scope, self.underlying());
if (num_bytes == kMaxWord) {
Object readall_result(&scope, thread->invokeMethod1(raw_file, ID(readall)));
if (readall_result.isErrorException()) return *readall_result;
if (!readall_result.isErrorNotFound()) {
Bytes bytes(&scope, Bytes::empty());
word bytes_length;
if (readall_result.isNoneType()) {
if (available == 0) return NoneType::object();
bytes_length = 0;
} else if (runtime->isInstanceOfBytes(*readall_result)) {
bytes = bytesUnderlying(*readall_result);
bytes_length = bytes.length();
} else if (runtime->isInstanceOfBytearray(*readall_result)) {
Bytearray byte_array(&scope, *readall_result);
bytes = byte_array.items();
bytes_length = byte_array.numItems();
} else if (runtime->isByteslike(*readall_result)) {
UNIMPLEMENTED("byteslike");
} else {
return thread->raiseWithFmt(LayoutId::kTypeError,
"readall() should return bytes");
}
word length = bytes_length + available;
if (length == 0) return Bytes::empty();
MutableBytes result(&scope,
runtime->newMutableBytesUninitialized(length));
word idx = 0;
if (available > 0) {
result.replaceFromWithStartAt(idx, MutableBytes::cast(self.readBuf()),
available, read_pos);
idx += available;
self.setReadPos(0);
self.setBufferNumBytes(0);
}
if (bytes_length > 0) {
result.replaceFromWithBytes(idx, *bytes, bytes_length);
idx += bytes_length;
}
DCHECK(idx == length, "length mismatch");
return result.becomeImmutable();
}
}
// Use alternate reading code for big requests where buffering would not help.
// (This is also used for the num_bytes==kMaxWord (aka "readall") case when
// the file object does not provide a "readall" method.
word buffer_size = self.bufferSize();
if (num_bytes > (buffer_size / 2)) {
return readBig(thread, self, num_bytes);
}
// Fill buffer until we have enough bytes available.
MutableBytes read_buf(&scope, rewindOrInitReadBuf(thread, self));
buffer_num_bytes = self.bufferNumBytes();
Object fill_result(&scope, NoneType::object());
do {
fill_result = fillBuffer(thread, raw_file, read_buf, &buffer_num_bytes);
if (fill_result.isErrorException()) return *fill_result;
if (!fill_result.isUnbound()) {
if (buffer_num_bytes == 0) return *fill_result;
break;
}
} while (buffer_num_bytes < num_bytes);
word length = Utils::minimum(buffer_num_bytes, num_bytes);
self.setBufferNumBytes(buffer_num_bytes);
self.setReadPos(length);
Bytes read_buf_bytes(&scope, *read_buf);
return bytesSubseq(thread, read_buf_bytes, 0, length);
}
RawObject FUNC(_io, _buffered_reader_readline)(Thread* thread, Arguments args) {
// TODO(T58490915): Investigate what thread safety guarantees Python has,
// and add locking code as necessary.
HandleScope scope(thread);
Runtime* runtime = thread->runtime();
Object self_obj(&scope, args.get(0));
if (!runtime->isInstanceOfBufferedReader(*self_obj)) {
return thread->raiseRequiresType(self_obj, ID(BufferedReader));
}
BufferedReader self(&scope, *self_obj);
Object max_line_bytes_obj(&scope, args.get(1));
word max_line_bytes = kMaxWord;
if (!max_line_bytes_obj.isNoneType()) {
// TODO(T59004416) Is there a way to push intFromIndex() towards managed?
Object max_line_bytes_int_obj(&scope,
intFromIndex(thread, max_line_bytes_obj));
if (max_line_bytes_int_obj.isErrorException()) {
return *max_line_bytes_int_obj;
}
Int max_line_bytes_int(&scope, intUnderlying(*max_line_bytes_int_obj));
if (!max_line_bytes_int.isSmallInt() && !max_line_bytes_int.isBool()) {
return thread->raiseWithFmt(
LayoutId::kOverflowError,
"cannot fit value into an index-sized integer");
}
max_line_bytes = max_line_bytes_int.asWord();
if (max_line_bytes == -1) {
max_line_bytes = kMaxWord;
} else if (max_line_bytes < 0) {
return thread->raiseWithFmt(LayoutId::kValueError,
"read length must be non-negative or -1");
}
}
word buffer_num_bytes = self.bufferNumBytes();
word read_pos = self.readPos();
word available = buffer_num_bytes - read_pos;
if (available > 0) {
MutableBytes read_buf(&scope, self.readBuf());
word line_end = -1;
word scan_length = available;
if (available >= max_line_bytes) {
scan_length = max_line_bytes;
line_end = read_pos + max_line_bytes;
} else {
max_line_bytes -= available;
}
word newline_index = read_buf.findByte('\n', read_pos, scan_length);
if (newline_index >= 0) {
line_end = newline_index + 1;
}
if (line_end >= 0) {
self.setReadPos(line_end);
Bytes read_buf_bytes(&scope, *read_buf);
return bytesSubseq(thread, read_buf_bytes, read_pos, line_end - read_pos);
}
}
MutableBytes read_buf(&scope, rewindOrInitReadBuf(thread, self));
buffer_num_bytes = self.bufferNumBytes();
word buffer_size = self.bufferSize();
Object raw_file(&scope, self.underlying());
Object fill_result(&scope, NoneType::object());
Object chunks(&scope, NoneType::object());
word line_end = -1;
// Outer loop in case for case where a line is longer than a single buffer. In
// that case we will collect the pieces in the `chunks` list.
for (;;) {
// Fill buffer until we find a newline character or filled up the whole
// buffer.
do {
word old_buffer_num_bytes = buffer_num_bytes;
fill_result = fillBuffer(thread, raw_file, read_buf, &buffer_num_bytes);
if (fill_result.isErrorException()) return *fill_result;
if (!fill_result.isUnbound()) {
if (buffer_num_bytes == 0 && chunks.isNoneType()) return *fill_result;
line_end = buffer_num_bytes;
break;
}
word scan_start = old_buffer_num_bytes;
word scan_length = buffer_num_bytes - old_buffer_num_bytes;
if (scan_length >= max_line_bytes) {
scan_length = max_line_bytes;
line_end = scan_start + max_line_bytes;
} else {
max_line_bytes -= buffer_num_bytes - old_buffer_num_bytes;
}
word newline_index = read_buf.findByte('\n', scan_start, scan_length);
if (newline_index >= 0) {
line_end = newline_index + 1;
break;
}
} while (line_end < 0 && buffer_num_bytes < buffer_size);
if (line_end < 0) {
// The line is longer than the buffer: Add the current buffer to the
// chunks list, create a fresh one and repeat scan loop.
if (chunks.isNoneType()) {
chunks = runtime->newList();
}
List list(&scope, *chunks);
runtime->listAdd(thread, list, read_buf);
// Create a fresh buffer and retry.
read_buf = initReadBuf(thread, self);
buffer_num_bytes = 0;
continue;
}
break;
}
word length = line_end;
if (!chunks.isNoneType()) {
List list(&scope, *chunks);
for (word i = 0, num_items = list.numItems(); i < num_items; i++) {
length += MutableBytes::cast(list.at(i)).length();
}
}
MutableBytes result(&scope, runtime->newMutableBytesUninitialized(length));
word idx = 0;
if (!chunks.isNoneType()) {
List list(&scope, *chunks);
Bytes chunk(&scope, Bytes::empty());
for (word i = 0, num_items = list.numItems(); i < num_items; i++) {
chunk = list.at(i);
word chunk_length = chunk.length();
result.replaceFromWithBytes(idx, *chunk, chunk_length);
idx += chunk_length;
}
}
result.replaceFromWith(idx, *read_buf, line_end);
DCHECK(idx + line_end == length, "length mismatch");
self.setReadPos(line_end);
self.setBufferNumBytes(buffer_num_bytes);
return result.becomeImmutable();
}
RawObject FUNC(_io, _TextIOWrapper_attached_guard)(Thread* thread,
Arguments args) {
HandleScope scope(thread);
Runtime* runtime = thread->runtime();
Object self_obj(&scope, args.get(0));
if (!runtime->isInstanceOfTextIOWrapper(*self_obj)) {
return thread->raiseRequiresType(self_obj, ID(TextIOWrapper));
}
TextIOWrapper self(&scope, *self_obj);
if (self.detached()) {
return thread->raiseWithFmt(LayoutId::kValueError,
"underlying buffer has been detached");
}
return NoneType::object();
}
RawObject FUNC(_io, _TextIOWrapper_attached_closed_guard)(Thread* thread,
Arguments args) {
HandleScope scope(thread);
Object self_obj(&scope, args.get(0));
Runtime* runtime = thread->runtime();
if (!runtime->isInstanceOfTextIOWrapper(*self_obj)) {
return thread->raiseRequiresType(self_obj, ID(TextIOWrapper));
}
TextIOWrapper self(&scope, *self_obj);
if (self.detached()) {
return thread->raiseWithFmt(LayoutId::kValueError,
"underlying buffer has been detached");
}
Object buffer_obj(&scope, self.buffer());
if (runtime->isInstanceOfBufferedReader(*buffer_obj)) {
BufferedReader buffer(&scope, *buffer_obj);
if (buffer.closed()) {
return thread->raiseWithFmt(LayoutId::kValueError,
"I/O operation on closed file.");
}
return NoneType::object();
}
if (runtime->isInstanceOfBufferedWriter(*buffer_obj)) {
BufferedWriter buffer(&scope, *buffer_obj);
if (!buffer.closed()) {
return NoneType::object();
}
return thread->raiseWithFmt(LayoutId::kValueError,
"I/O operation on closed file.");
}
// TODO(T61927696): Add closed check support for other types of buffer
return Unbound::object();
}
RawObject FUNC(_io,
_TextIOWrapper_attached_closed_seekable_guard)(Thread* thread,
Arguments args) {
HandleScope scope(thread);
Object self_obj(&scope, args.get(0));
Runtime* runtime = thread->runtime();
if (!runtime->isInstanceOfTextIOWrapper(*self_obj)) {
return thread->raiseRequiresType(self_obj, ID(TextIOWrapper));
}
TextIOWrapper self(&scope, *self_obj);
if (self.detached()) {
return thread->raiseWithFmt(LayoutId::kValueError,
"underlying buffer has been detached");
}
Object buffer_obj(&scope, self.buffer());
if (runtime->isInstanceOfBufferedReader(*buffer_obj)) {
BufferedReader buffer(&scope, *buffer_obj);
if (buffer.closed()) {
return thread->raiseWithFmt(LayoutId::kValueError,
"I/O operation on closed file.");
}
// TODO(T61927696): change this when TextIOWrapper.seekable() returns bool
Object seekable_obj(&scope, self.seekable());
if (seekable_obj.isNoneType() || seekable_obj == Bool::falseObj()) {
return thread->raiseWithFmt(LayoutId::kValueError,
"underlying stream is not seekable");
}
return NoneType::object();
}
if (runtime->isInstanceOfBufferedWriter(*buffer_obj)) {
BufferedWriter buffer(&scope, *buffer_obj);
if (!buffer.closed()) {
// TODO(T61927696): change this when TextIOWrapper.seekable() returns bool
Object seekable_obj(&scope, self.seekable());
if (seekable_obj.isNoneType() || seekable_obj == Bool::falseObj()) {
return thread->raiseWithFmt(LayoutId::kValueError,
"underlying stream is not seekable");
}
return NoneType::object();
}
return thread->raiseWithFmt(LayoutId::kValueError,
"I/O operation on closed file.");
}
// TODO(T61927696): Add closed check support for other types of buffer
return Unbound::object();
}
// Copy the bytes of a UTF-8 encoded string with no surrogates to the write
// buffer (a Bytearray) of underlying Bufferedwriter of TextIOWrapper
// If the length of write buffer will be larger than
// BufferedWriter.bufferSize(), return Unbound to escape to managed code and
// call BufferedWriter.write()
// If the newline is "\r\n", return Unbound to use managed code
// If text_io.lineBuffering() or haslf or "\r" in text, return Unbound to
// managed code to use flush()
// TODO(T61927696): Implement native version of BufferedWriter._flush_unlocked()
// with FileIO as BufferedWriter.raw. With that function, we can do flush in
// here.
RawObject FUNC(_io, _TextIOWrapper_write_UTF8)(Thread* thread, Arguments args) {
HandleScope scope(thread);
Object text_obj(&scope, args.get(1));
Runtime* runtime = thread->runtime();
if (!runtime->isInstanceOfStr(*text_obj)) {
return thread->raiseWithFmt(LayoutId::kTypeError,
"write() argument must be str, not %T",
&text_obj);
}
Object self_obj(&scope, args.get(0));
if (!runtime->isInstanceOfTextIOWrapper(*self_obj)) {
return thread->raiseRequiresType(self_obj, ID(TextIOWrapper));
}
TextIOWrapper text_io(&scope, *self_obj);
if (text_io.detached()) {
return thread->raiseWithFmt(LayoutId::kValueError,
"underlying buffer has been detached");
}
Object buffer_obj(&scope, text_io.buffer());
if (!buffer_obj.isBufferedWriter()) {
return Unbound::object();
}
BufferedWriter buffer(&scope, text_io.buffer());
if (buffer.closed()) {
return thread->raiseWithFmt(LayoutId::kTypeError,
"I/O operation on closed file.");
}
if (text_io.encoding() != SmallStr::fromCStr("utf-8") &&
text_io.encoding() != SmallStr::fromCStr("UTF-8")) {
return Unbound::object();
}
Str writenl(&scope, text_io.writenl());
// Only allow writenl to be cr or lf in this short cut
if (!text_io.writetranslate() || writenl == SmallStr::fromCStr("\r\n")) {
return Unbound::object();
}
Str text(&scope, strUnderlying(*text_obj));
word text_len = text.length();
Bytearray write_buffer(&scope, buffer.writeBuf());
word old_len = write_buffer.numItems();
word new_len = old_len + text_len;
runtime->bytearrayEnsureCapacity(thread, write_buffer, new_len);
MutableBytes write_buffer_bytes(&scope, write_buffer.items());
write_buffer_bytes.replaceFromWithStr(old_len, *text, text_len);
write_buffer.setNumItems(new_len);
int32_t codepoint;
word num_bytes;
bool hasnl = false;
if (writenl == SmallStr::fromCStr("\n")) {
for (word offset = 0; offset < text_len;) {
codepoint = text.codePointAt(offset, &num_bytes);
if (Unicode::isSurrogate(codepoint)) {
write_buffer.downsize(old_len);
return Unbound::object();
}
if (num_bytes == 1) {
if (text.byteAt(offset) == '\n' || text.byteAt(offset) == '\r') {
hasnl = true;
}
}
offset += num_bytes;
}
} else {
for (word offset = 0; offset < text_len;) {
codepoint = text.codePointAt(offset, &num_bytes);
if (Unicode::isSurrogate(codepoint)) {
write_buffer.downsize(old_len);
return Unbound::object();
}
if (num_bytes == 1) {
if (text.byteAt(offset) == '\n') {
hasnl = true;
write_buffer_bytes.byteAtPut(offset + old_len, byte{'\r'});
offset += num_bytes;
continue;
}
if (text.byteAt(offset) == '\r') {
hasnl = true;
offset += num_bytes;
continue;
}
}
offset += num_bytes;
}
}
if (text_io.lineBuffering() && hasnl) {
// TODO(T61927696): Implement native support for
// BufferedWriter._flush_unlocked to do flush here
Object flush_result(&scope, thread->invokeMethod1(buffer, ID(flush)));
if (flush_result.isErrorException()) return *flush_result;
text_io.setTelling(text_io.seekable());
}
text_io.setDecodedChars(Str::empty());
text_io.setSnapshot(NoneType::object());
Object decoder_obj(&scope, text_io.decoder());
if (!decoder_obj.isNoneType()) {
Object reset_result(&scope, thread->invokeMethod1(decoder_obj, ID(reset)));
if (reset_result.isErrorException()) return *reset_result;
}
return SmallInt::fromWord(text_len);
}
static const BuiltinAttribute kUnderIOBaseAttributes[] = {
{ID(_closed), RawUnderIOBase::kClosedOffset},
};
static const BuiltinAttribute kIncrementalNewlineDecoderAttributes[] = {
{ID(_errors), RawIncrementalNewlineDecoder::kErrorsOffset},
{ID(_translate), RawIncrementalNewlineDecoder::kTranslateOffset},
{ID(_decoder), RawIncrementalNewlineDecoder::kDecoderOffset},
{ID(_seennl), RawIncrementalNewlineDecoder::kSeennlOffset},
{ID(_pendingcr), RawIncrementalNewlineDecoder::kPendingcrOffset},
};
static const BuiltinAttribute kUnderBufferedIOMixinAttributes[] = {
{ID(_raw), RawUnderBufferedIOMixin::kUnderlyingOffset},
};
static const BuiltinAttribute kBufferedRandomAttributes[] = {
{ID(buffer_size), RawBufferedRandom::kBufferSizeOffset},
{ID(_reader), RawBufferedRandom::kReaderOffset},
{ID(_write_buf), RawBufferedRandom::kWriteBufOffset},
{ID(_write_lock), RawBufferedRandom::kWriteLockOffset},
};
static const BuiltinAttribute kBufferedReaderAttributes[] = {
{ID(_buffer_size), RawBufferedReader::kBufferSizeOffset,
AttributeFlags::kReadOnly},
{ID(_buffered_reader__read_buf), RawBufferedReader::kReadBufOffset,
AttributeFlags::kHidden},
{ID(_read_pos), RawBufferedReader::kReadPosOffset,
AttributeFlags::kReadOnly},
{ID(_buffer_num_bytes), RawBufferedReader::kBufferNumBytesOffset,
AttributeFlags::kReadOnly},
};
static const BuiltinAttribute kBufferedWriterAttributes[] = {
{ID(buffer_size), RawBufferedWriter::kBufferSizeOffset},
{ID(_write_buf), RawBufferedWriter::kWriteBufOffset},
{ID(_write_lock), RawBufferedWriter::kWriteLockOffset},
};
static const BuiltinAttribute kBytesIOAttributes[] = {
{ID(_buffer), RawBytesIO::kBufferOffset},
{ID(_BytesIO__num_items), RawBytesIO::kNumItemsOffset,
AttributeFlags::kReadOnly},
{ID(_pos), RawBytesIO::kPosOffset},
};
static void bytesIOEnsureCapacity(Thread* thread, const BytesIO& bytes_io,
word min_capacity) {
DCHECK_BOUND(min_capacity, SmallInt::kMaxValue);
HandleScope scope(thread);
MutableBytes curr_buffer(&scope, bytes_io.buffer());
word curr_capacity = curr_buffer.length();
if (min_capacity <= curr_capacity) return;
word new_capacity = Runtime::newCapacity(curr_capacity, min_capacity);
MutableBytes new_buffer(
&scope, thread->runtime()->newMutableBytesUninitialized(new_capacity));
new_buffer.replaceFromWith(0, *curr_buffer, curr_capacity);
new_buffer.replaceFromWithByte(curr_capacity, 0,
new_capacity - curr_capacity);
bytes_io.setBuffer(*new_buffer);
}
RawObject METH(BytesIO, __init__)(Thread* thread, Arguments args) {
HandleScope scope(thread);
Object self(&scope, args.get(0));
Runtime* runtime = thread->runtime();
if (!runtime->isInstanceOfBytesIO(*self)) {
return thread->raiseRequiresType(self, ID(BytesIO));
}
BytesIO bytes_io(&scope, *self);
Object initial_bytes(&scope, args.get(1));
if (initial_bytes.isNoneType() || initial_bytes == Bytes::empty()) {
bytes_io.setBuffer(runtime->emptyMutableBytes());
bytes_io.setNumItems(0);
bytes_io.setPos(0);
bytes_io.setClosed(false);
return NoneType::object();
}
Byteslike byteslike(&scope, thread, *initial_bytes);
if (!byteslike.isValid()) {
return thread->raiseWithFmt(LayoutId::kTypeError,
"a bytes-like object is required, not '%T'",
&initial_bytes);
}
word byteslike_length = byteslike.length();
MutableBytes buffer(&scope,
runtime->newMutableBytesUninitialized(byteslike_length));
buffer.replaceFromWithByteslike(0, byteslike, byteslike_length);
bytes_io.setBuffer(*buffer);
bytes_io.setClosed(false);
bytes_io.setNumItems(byteslike_length);
bytes_io.setPos(0);
return NoneType::object();
}
RawObject METH(BytesIO, getvalue)(Thread* thread, Arguments args) {
HandleScope scope(thread);
Runtime* runtime = thread->runtime();
Object self(&scope, args.get(0));
if (!runtime->isInstanceOfBytesIO(*self)) {
return thread->raiseRequiresType(self, ID(BytesIO));
}
BytesIO bytes_io(&scope, *self);
if (bytes_io.closed()) {
return thread->raiseWithFmt(LayoutId::kValueError,
"I/O operation on closed file.");
}
Bytes buffer(&scope, bytes_io.buffer());
word num_items = bytes_io.numItems();
return runtime->bytesCopyWithSize(thread, buffer, num_items);
}
RawObject METH(BytesIO, read)(Thread* thread, Arguments args) {
HandleScope scope(thread);
Runtime* runtime = thread->runtime();
Object self(&scope, args.get(0));
if (!runtime->isInstanceOfBytesIO(*self)) {
return thread->raiseRequiresType(self, ID(BytesIO));
}
BytesIO bytes_io(&scope, *self);
if (bytes_io.closed()) {
return thread->raiseWithFmt(LayoutId::kValueError,
"I/O operation on closed file.");
}
Object size_obj(&scope, args.get(1));
MutableBytes buffer(&scope, bytes_io.buffer());
word size;
word buffer_len = bytes_io.numItems();
word pos = bytes_io.pos();
if (size_obj.isNoneType()) {
size = buffer_len;
} else {
size_obj = intFromIndex(thread, size_obj);
if (size_obj.isError()) return *size_obj;
// Allow SmallInt, Bool, and subclasses of Int containing SmallInt or Bool
Int size_int(&scope, intUnderlying(*size_obj));
if (size_obj.isLargeInt()) {
return thread->raiseWithFmt(LayoutId::kOverflowError,
"cannot fit '%T' into an index-sized integer",
&size_int);
}
if (size_int.asWord() < 0) {
size = buffer_len;
} else {
size = size_int.asWord();
}
}
if (buffer_len <= pos) {
return Bytes::empty();
}
word new_pos = Utils::minimum(buffer_len, pos + size);
bytes_io.setPos(new_pos);
Bytes result(&scope, *buffer);
return bytesSubseq(thread, result, pos, new_pos - pos);
}
RawObject METH(BytesIO, write)(Thread* thread, Arguments args) {
HandleScope scope(thread);
Runtime* runtime = thread->runtime();
Object self(&scope, args.get(0));
if (!runtime->isInstanceOfBytesIO(*self)) {
return thread->raiseRequiresType(self, ID(BytesIO));
}
BytesIO bytes_io(&scope, *self);
if (bytes_io.closed()) {
return thread->raiseWithFmt(LayoutId::kValueError,
"I/O operation on closed file.");
}
Object value_obj(&scope, args.get(1));
Byteslike value(&scope, thread, *value_obj);
if (!value.isValid()) {
return thread->raiseWithFmt(LayoutId::kTypeError,
"a bytes-like object is required, not '%T'",
&value_obj);
}
word pos = bytes_io.pos();
word value_length = value.length();
word new_pos = pos + value_length;
bytesIOEnsureCapacity(thread, bytes_io, new_pos);
MutableBytes::cast(bytes_io.buffer())
.replaceFromWithByteslike(pos, value, value_length);
if (new_pos > bytes_io.numItems()) {
bytes_io.setNumItems(new_pos);
}
bytes_io.setPos(new_pos);
return SmallInt::fromWord(value_length);
}
static const BuiltinAttribute kFileIOAttributes[] = {
{ID(_fd), RawFileIO::kFdOffset},
{ID(name), RawFileIO::kNameOffset},
{ID(_created), RawFileIO::kCreatedOffset},
{ID(_readable), RawFileIO::kReadableOffset},
{ID(_writable), RawFileIO::kWritableOffset},
{ID(_appending), RawFileIO::kAppendingOffset},
{ID(_seekable), RawFileIO::kSeekableOffset},
{ID(_closefd), RawFileIO::kCloseFdOffset},
};
static const word kDefaultBufferSize = 1 * kKiB; // bytes
RawObject METH(FileIO, readall)(Thread* thread, Arguments args) {
HandleScope scope(thread);
Runtime* runtime = thread->runtime();
Object self(&scope, args.get(0));
if (!runtime->isInstanceOfFileIO(*self)) {
return thread->raiseRequiresType(self, ID(FileIO));
}
FileIO file_io(&scope, *self);
if (file_io.closed()) {
return thread->raiseWithFmt(LayoutId::kValueError,
"I/O operation on closed file.");
}
Object fd_obj(&scope, file_io.fd());
DCHECK(fd_obj.isSmallInt(), "fd must be small int");
int fd = SmallInt::cast(*fd_obj).value();
// If there is OSError from File::seek or File::size, error will not be
// thrown. This case is handled by the for loop below.
word pos = File::seek(fd, 0, 1);
word end = File::size(fd);
word buffer_size = kDefaultBufferSize;
if (end > 0 && pos >= 0 && end >= pos) {
buffer_size = end - pos + 1;
}
// OSError from getting File::seek or File::size, or end < pos
// read buffer by buffer
Bytearray result_array(&scope, runtime->newBytearray());
word read_size;
word total_len = 0;
for (;;) {
read_size = buffer_size;
runtime->bytearrayEnsureCapacity(thread, result_array,
total_len + buffer_size);
byte* dst = reinterpret_cast<byte*>(
MutableBytes::cast(result_array.items()).address());
word result_len = File::read(fd, dst + total_len, read_size);
if (result_len < 0) {
return thread->raiseOSErrorFromErrno(-result_len);
}
total_len += result_len;
// From the glibc manual: "If read returns at least one character, there
// is no way you can tell whether end-of-file was reached. But if you did
// reach the end, the next read will return zero."
// Therefore, we can't stop when the result_len is less than read_len, as
// we still don't know if there's more input that we're blocked on.
if (result_len == 0) {
if (total_len == 0) {
return Bytes::empty();
}
// TODO(T70612758): Find a way to shorten the MutableBytes object without
// extra allocation
Bytes result_bytes(
&scope, MutableBytes::cast(result_array.items()).becomeImmutable());
MutableBytes result(&scope,
runtime->newMutableBytesUninitialized(total_len));
dst = reinterpret_cast<byte*>(result.address());
result_bytes.copyTo(dst, total_len);
return result.becomeImmutable();
}
result_array.setNumItems(total_len);
if (total_len == buffer_size) {
buffer_size *= 2;
}
}
}
static RawObject readintoBytesAddress(Thread* thread, const int fd, byte* dst,
const word dst_len) {
if (dst_len == 0) {
return SmallInt::fromWord(0);
}
word result = File::read(fd, dst, dst_len);
if (result < 0) return thread->raiseOSErrorFromErrno(-result);
return SmallInt::fromWord(result);
}
RawObject METH(FileIO, readinto)(Thread* thread, Arguments args) {
HandleScope scope(thread);
Object self(&scope, args.get(0));
Runtime* runtime = thread->runtime();
if (!runtime->isInstanceOfFileIO(*self)) {
return thread->raiseRequiresType(self, ID(FileIO));
}
FileIO file_io(&scope, *self);
if (file_io.closed()) {
return thread->raiseWithFmt(LayoutId::kValueError,
"I/O operation on closed file.");
}
Object dst_obj(&scope, args.get(1));
if (!runtime->isByteslike(*dst_obj) && !runtime->isInstanceOfMmap(*dst_obj)) {
return thread->raiseWithFmt(LayoutId::kTypeError,
"Expected bytes-like object, not %T", &dst_obj);
}
int fd = SmallInt::cast(file_io.fd()).value();
if (runtime->isInstanceOfBytearray(*dst_obj)) {
Bytearray dst_array(&scope, *dst_obj);
return readintoBytesAddress(
thread, fd,
reinterpret_cast<byte*>(
MutableBytes::cast(dst_array.items()).address()),
dst_array.numItems());
}
if (dst_obj.isArray()) {
Array array(&scope, *dst_obj);
return readintoBytesAddress(
thread, fd,
reinterpret_cast<byte*>(MutableBytes::cast(array.buffer()).address()),
array.length());
}
if (dst_obj.isMemoryView()) {
MemoryView dst_memoryview(&scope, *dst_obj);
dst_obj = dst_memoryview.buffer();
if (runtime->isInstanceOfBytes(*dst_obj)) {
return thread->raiseWithFmt(
LayoutId::kTypeError, "Expected read-write bytes-like object, not %T",
&dst_memoryview);
}
Pointer dst_ptr(&scope, *dst_obj);
return readintoBytesAddress(
thread, fd, reinterpret_cast<byte*>(dst_ptr.cptr()), dst_ptr.length());
}
if (dst_obj.isMmap()) {
Mmap dst_mmap(&scope, *dst_obj);
if (!dst_mmap.isWritable()) {
return thread->raiseWithFmt(
LayoutId::kTypeError, "Expected read-write bytes-like object, not %T",
&dst_mmap);
}
Pointer dst_ptr(&scope, dst_mmap.data());
return readintoBytesAddress(
thread, fd, reinterpret_cast<byte*>(dst_ptr.cptr()), dst_ptr.length());
}
// Bytes, not valid arguments for readinto
return thread->raiseWithFmt(LayoutId::kTypeError,
"Expected read-write bytes-like object, not %T",
&dst_obj);
}
static const BuiltinAttribute kStringIOAttributes[] = {
{ID(_buffer), RawStringIO::kBufferOffset},
{ID(_pos), RawStringIO::kPosOffset},
{ID(_readnl), RawStringIO::kReadnlOffset},
{ID(_readtranslate), RawStringIO::kReadtranslateOffset},
{ID(_readuniversal), RawStringIO::kReaduniversalOffset},
{ID(_seennl), RawStringIO::kSeennlOffset},
{ID(_writenl), RawStringIO::kWritenlOffset},
{ID(_writetranslate), RawStringIO::kWritetranslateOffset},
};
enum NewlineFound { kLF = 0x1, kCR = 0x2, kCRLF = 0x4 };
static RawObject stringIOWrite(Thread* thread, const StringIO& string_io,
const Str& value) {
HandleScope scope(thread);
Runtime* runtime = thread->runtime();
if (*value == Str::empty()) {
return SmallInt::fromWord(0);
}
Str writenl(&scope, string_io.writenl());
bool long_writenl = writenl.length() == 2;
byte first_writenl_char = writenl.byteAt(0);
bool has_write_translate =
string_io.hasWritetranslate() && first_writenl_char != '\n';
word original_val_len = value.length();
word val_len = original_val_len;
// If write_translate is true, read_translate is false
// Contrapositively, if read_translate is true, write_translate is false
// Therefore we don't have to worry about their interactions with each other
if (has_write_translate && long_writenl) {
val_len += value.occurrencesOf(SmallStr::fromCStr("\n"));
}
word start = string_io.pos();
word new_len = start + val_len;
bool has_read_translate = string_io.hasReadtranslate();
if (has_read_translate) {
new_len -= value.occurrencesOf(SmallStr::fromCStr("\r\n"));
}
MutableBytes buffer(&scope, string_io.buffer());
word old_len = buffer.length();
if (old_len < new_len) {
MutableBytes new_buffer(&scope,
runtime->newMutableBytesUninitialized(new_len));
new_buffer.replaceFromWith(0, *buffer, old_len);
new_buffer.replaceFromWithByte(old_len, 0, new_len - old_len);
string_io.setBuffer(*new_buffer);
buffer = *new_buffer;
}
if (has_read_translate) {
word new_seen_nl = Int::cast(string_io.seennl()).asWord();
for (word str_i = 0, byte_i = start; str_i < val_len; ++str_i, ++byte_i) {
byte ch = value.byteAt(str_i);
if (ch == '\r') {
if (val_len > str_i + 1 && value.byteAt(str_i + 1) == '\n') {
new_seen_nl |= NewlineFound::kCRLF;
buffer.byteAtPut(byte_i, '\n');
str_i++;
continue;
}
new_seen_nl |= NewlineFound::kCR;
buffer.byteAtPut(byte_i, '\n');
continue;
}
if (ch == '\n') {
new_seen_nl |= NewlineFound::kLF;
}
buffer.byteAtPut(byte_i, ch);
}
string_io.setSeennl(SmallInt::fromWord(new_seen_nl));
} else if (has_write_translate) {
for (word str_i = 0, byte_i = start; str_i < original_val_len;
++str_i, ++byte_i) {
byte ch = value.byteAt(str_i);
if (ch == '\n') {
buffer.byteAtPut(byte_i, first_writenl_char);
if (long_writenl) {
buffer.byteAtPut(++byte_i, writenl.byteAt(1));
}
continue;
}
buffer.byteAtPut(byte_i, ch);
}
} else {
buffer.replaceFromWithStr(start, *value, val_len);
}
string_io.setPos(new_len);
return SmallInt::fromWord(original_val_len);
}
static bool isValidStringIONewline(const Object& newline) {
if (newline == SmallStr::empty()) return true;
if (newline == SmallStr::fromCodePoint('\n')) return true;
if (newline == SmallStr::fromCodePoint('\r')) return true;
return newline == SmallStr::fromCStr("\r\n");
}
RawObject METH(StringIO, __init__)(Thread* thread, Arguments args) {
HandleScope scope(thread);
Runtime* runtime = thread->runtime();
Object self(&scope, args.get(0));
if (!runtime->isInstanceOfStringIO(*self)) {
return thread->raiseRequiresType(self, ID(StringIO));
}
Object newline(&scope, args.get(2));
if (newline != NoneType::object()) {
if (!runtime->isInstanceOfStr(*newline)) {
return thread->raiseWithFmt(LayoutId::kTypeError,
"newline must be str or None, not %T",
&newline);
}
newline = strUnderlying(*newline);
if (!isValidStringIONewline(newline)) {
return thread->raiseWithFmt(LayoutId::kValueError,
"illegal newline value: %S", &newline);
}
}
StringIO string_io(&scope, *self);
string_io.setBuffer(runtime->emptyMutableBytes());
string_io.setClosed(false);
string_io.setPos(0);
string_io.setReadnl(*newline);
string_io.setSeennl(SmallInt::fromWord(0));
if (newline == NoneType::object()) {
string_io.setReadtranslate(true);
string_io.setReaduniversal(true);
string_io.setWritetranslate(false);
string_io.setWritenl(SmallStr::fromCodePoint('\n'));
} else if (newline == Str::empty()) {
string_io.setReadtranslate(false);
string_io.setReaduniversal(true);
string_io.setWritetranslate(false);
string_io.setWritenl(SmallStr::fromCodePoint('\n'));
} else {
string_io.setReadtranslate(false);
string_io.setReaduniversal(false);
string_io.setWritetranslate(true);
string_io.setWritenl(*newline);
}
Object initial_value_obj(&scope, args.get(1));
if (initial_value_obj != NoneType::object()) {
if (!runtime->isInstanceOfStr(*initial_value_obj)) {
return thread->raiseWithFmt(LayoutId::kTypeError,
"initial_value must be str or None, not %T",
&initial_value_obj);
}
Str initial_value(&scope, strUnderlying(*initial_value_obj));
stringIOWrite(thread, string_io, initial_value);
string_io.setPos(0);
}
return NoneType::object();
}
static word stringIOReadline(Thread* thread, const StringIO& string_io,
word size) {
HandleScope scope(thread);
MutableBytes buffer(&scope, string_io.buffer());
word buf_len = buffer.length();
word start = string_io.pos();
if (start >= buf_len) {
return -1;
}
bool has_read_universal = string_io.hasReaduniversal();
bool has_read_translate = string_io.hasReadtranslate();
Object newline_obj(&scope, string_io.readnl());
if (has_read_translate) {
newline_obj = SmallStr::fromCodePoint('\n');
}
Str newline(&scope, *newline_obj);
if (size < 0 || (size + start) > buf_len) {
size = buf_len - start;
}
word i = start;
if (has_read_universal) {
const byte crlf[] = {'\r', '\n'};
i = buffer.indexOfAny(crlf, start);
// when this condition is met, either '\r' or '\n' is found
if (buf_len > i) {
// ch is the '\n' or '\r'
byte ch = buffer.byteAt(i++);
if (ch == '\r') {
if (buf_len > i && buffer.byteAt(i) == '\n') {
i++;
}
}
}
} else {
byte first_nl_byte = newline.byteAt(0);
while (i < start + size) {
word index = buffer.findByte(first_nl_byte, i, (size + start - i));
if (index == -1) {
i += (size + start - i);
break;
}
i = index + 1;
if (buf_len >= (i + newline.length() - 1)) {
bool match = true;
for (int j = 1; j < newline.length(); j++) {
if (buffer.byteAt(i + j - 1) != newline.byteAt(j)) {
match = false;
}
}
if (match) {
i += (newline.length() - 1);
break;
}
}
}
}
string_io.setPos(i);
return i;
}
RawObject METH(StringIO, __next__)(Thread* thread, Arguments args) {
HandleScope scope(thread);
Object self(&scope, args.get(0));
if (!thread->runtime()->isInstanceOfStringIO(*self)) {
return thread->raiseRequiresType(self, ID(StringIO));
}
StringIO string_io(&scope, *self);
if (string_io.closed()) {
return thread->raiseWithFmt(LayoutId::kValueError,
"I/O operation on closed file.");
}
word start = string_io.pos();
word end = stringIOReadline(thread, string_io, -1);
if (end == -1) {
return thread->raise(LayoutId::kStopIteration, NoneType::object());
}
Bytes result(&scope, string_io.buffer());
result = bytesSubseq(thread, result, start, end - start);
return result.becomeStr();
}
RawObject METH(StringIO, close)(Thread* thread, Arguments args) {
HandleScope scope(thread);
Object self_obj(&scope, args.get(0));
if (!thread->runtime()->isInstanceOfStringIO(*self_obj)) {
return thread->raiseRequiresType(self_obj, ID(StringIO));
}
StringIO self(&scope, *self_obj);
self.setClosed(true);
return NoneType::object();
}
RawObject METH(StringIO, getvalue)(Thread* thread, Arguments args) {
HandleScope scope(thread);
Runtime* runtime = thread->runtime();
Object self(&scope, args.get(0));
if (!runtime->isInstanceOfStringIO(*self)) {
return thread->raiseRequiresType(self, ID(StringIO));
}
StringIO string_io(&scope, *self);
if (string_io.closed()) {
return thread->raiseWithFmt(LayoutId::kValueError,
"I/O operation on closed file.");
}
Bytes buffer(&scope, string_io.buffer());
buffer = runtime->bytesCopy(thread, buffer);
return buffer.becomeStr();
}
RawObject METH(StringIO, read)(Thread* thread, Arguments args) {
HandleScope scope(thread);
Object self(&scope, args.get(0));
if (!thread->runtime()->isInstanceOfStringIO(*self)) {
return thread->raiseRequiresType(self, ID(StringIO));
}
StringIO string_io(&scope, *self);
if (string_io.closed()) {
return thread->raiseWithFmt(LayoutId::kValueError,
"I/O operation on closed file.");
}
Object size_obj(&scope, args.get(1));
word size;
if (size_obj.isNoneType()) {
size = -1;
} else {
size_obj = intFromIndex(thread, size_obj);
if (size_obj.isError()) return *size_obj;
// TODO(T55084422): have a better abstraction for int to word conversion
if (!size_obj.isSmallInt() && !size_obj.isBool()) {
return thread->raiseWithFmt(
LayoutId::kOverflowError,
"cannot fit value into an index-sized integer");
}
size = Int::cast(*size_obj).asWord();
}
Bytes result(&scope, string_io.buffer());
word start = string_io.pos();
word end = result.length();
if (start > end) {
return Str::empty();
}
if (size < 0) {
string_io.setPos(end);
result = bytesSubseq(thread, result, start, end - start);
return result.becomeStr();
}
word new_pos = Utils::minimum(end, start + size);
string_io.setPos(new_pos);
result = bytesSubseq(thread, result, start, new_pos - start);
return result.becomeStr();
}
RawObject METH(StringIO, readline)(Thread* thread, Arguments args) {
HandleScope scope(thread);
Object self(&scope, args.get(0));
if (!thread->runtime()->isInstanceOfStringIO(*self)) {
return thread->raiseRequiresType(self, ID(StringIO));
}
StringIO string_io(&scope, *self);
if (string_io.closed()) {
return thread->raiseWithFmt(LayoutId::kValueError,
"I/O operation on closed file.");
}
Object size_obj(&scope, args.get(1));
word size;
if (size_obj.isNoneType()) {
size = -1;
} else {
size_obj = intFromIndex(thread, size_obj);
if (size_obj.isError()) return *size_obj;
// TODO(T55084422): have a better abstraction for int to word conversion
if (!size_obj.isSmallInt() && !size_obj.isBool()) {
return thread->raiseWithFmt(
LayoutId::kOverflowError,
"cannot fit value into an index-sized integer");
}
size = Int::cast(*size_obj).asWord();
}
word start = string_io.pos();
word end = stringIOReadline(thread, string_io, size);
if (end == -1) {
return Str::empty();
}
Bytes result(&scope, string_io.buffer());
result = bytesSubseq(thread, result, start, end - start);
return result.becomeStr();
}
RawObject METH(StringIO, truncate)(Thread* thread, Arguments args) {
HandleScope scope(thread);
Object self(&scope, args.get(0));
Runtime* runtime = thread->runtime();
if (!runtime->isInstanceOfStringIO(*self)) {
return thread->raiseRequiresType(self, ID(StringIO));
}
StringIO string_io(&scope, *self);
if (string_io.closed()) {
return thread->raiseWithFmt(LayoutId::kValueError,
"I/O operation on closed file.");
}
Object size_obj(&scope, args.get(1));
word size;
if (size_obj.isNoneType()) {
size = string_io.pos();
} else {
size_obj = intFromIndex(thread, size_obj);
if (size_obj.isError()) return *size_obj;
// TODO(T55084422): have a better abstraction for int to word conversion
if (!size_obj.isSmallInt() && !size_obj.isBool()) {
return thread->raiseWithFmt(
LayoutId::kOverflowError,
"cannot fit value into an index-sized integer");
}
size = Int::cast(*size_obj).asWord();
if (size < 0) {
return thread->raiseWithFmt(LayoutId::kValueError,
"Negative size value %d", size);
}
}
MutableBytes buffer(&scope, string_io.buffer());
if (size < buffer.length()) {
MutableBytes new_buffer(&scope,
runtime->newMutableBytesUninitialized(size));
new_buffer.replaceFromWith(0, *buffer, size);
string_io.setBuffer(*new_buffer);
}
return SmallInt::fromWord(size);
}
RawObject METH(StringIO, write)(Thread* thread, Arguments args) {
HandleScope scope(thread);
Runtime* runtime = thread->runtime();
Object self(&scope, args.get(0));
if (!runtime->isInstanceOfStringIO(*self)) {
return thread->raiseRequiresType(self, ID(StringIO));
}
StringIO string_io(&scope, *self);
if (string_io.closed()) {
return thread->raiseWithFmt(LayoutId::kValueError,
"I/O operation on closed file.");
}
Object value(&scope, args.get(1));
if (!runtime->isInstanceOfStr(*value)) {
return thread->raiseRequiresType(value, ID(str));
}
Str str(&scope, strUnderlying(*value));
return stringIOWrite(thread, string_io, str);
}
static const BuiltinAttribute kTextIOWrapperAttributes[] = {
{ID(_buffer), RawTextIOWrapper::kBufferOffset},
{ID(_line_buffering), RawTextIOWrapper::kLineBufferingOffset},
{ID(_encoding), RawTextIOWrapper::kEncodingOffset},
{ID(_errors), RawTextIOWrapper::kErrorsOffset},
{ID(_readuniversal), RawTextIOWrapper::kReaduniversalOffset},
{ID(_readtranslate), RawTextIOWrapper::kReadtranslateOffset},
{ID(_readnl), RawTextIOWrapper::kReadnlOffset},
{ID(_writetranslate), RawTextIOWrapper::kWritetranslateOffset},
{ID(_writenl), RawTextIOWrapper::kWritenlOffset},
{ID(_encoder), RawTextIOWrapper::kEncoderOffset},
{ID(_decoder), RawTextIOWrapper::kDecoderOffset},
{ID(_decoded_chars), RawTextIOWrapper::kDecodedCharsOffset},
{ID(_decoded_chars_used), RawTextIOWrapper::kDecodedCharsUsedOffset},
{ID(_snapshot), RawTextIOWrapper::kSnapshotOffset},
{ID(_seekable), RawTextIOWrapper::kSeekableOffset},
{ID(_has_read1), RawTextIOWrapper::kHasRead1Offset},
{ID(_b2cratio), RawTextIOWrapper::kB2cratioOffset},
{ID(_telling), RawTextIOWrapper::kTellingOffset},
{ID(mode), RawTextIOWrapper::kModeOffset}, // TODO(T54575279): remove
};
void initializeUnderIOTypes(Thread* thread) {
HandleScope scope(thread);
Type type(&scope, addBuiltinType(thread, ID(_IOBase), LayoutId::kUnderIOBase,
/*superclass_id=*/LayoutId::kObject,
kUnderIOBaseAttributes, UnderIOBase::kSize,
/*basetype=*/true));
builtinTypeEnableTupleOverflow(thread, type);
addBuiltinType(thread, ID(IncrementalNewlineDecoder),
LayoutId::kIncrementalNewlineDecoder,
/*superclass_id=*/LayoutId::kObject,
kIncrementalNewlineDecoderAttributes,
IncrementalNewlineDecoder::kSize, /*basetype=*/true);
addBuiltinType(thread, ID(_RawIOBase), LayoutId::kUnderRawIOBase,
/*superclass_id=*/LayoutId::kUnderIOBase, kNoAttributes,
UnderRawIOBase::kSize, /*basetype=*/true);
addBuiltinType(thread, ID(_BufferedIOBase), LayoutId::kUnderBufferedIOBase,
/*superclass_id=*/LayoutId::kUnderIOBase, kNoAttributes,
UnderBufferedIOBase::kSize, /*basetype=*/true);
type = addBuiltinType(thread, ID(BytesIO), LayoutId::kBytesIO,
/*superclass_id=*/LayoutId::kUnderBufferedIOBase,
kBytesIOAttributes, BytesIO::kSize, /*basetype=*/true);
builtinTypeEnableTupleOverflow(thread, type);
addBuiltinType(thread, ID(_BufferedIOMixin), LayoutId::kUnderBufferedIOMixin,
/*superclass_id=*/LayoutId::kUnderBufferedIOBase,
kUnderBufferedIOMixinAttributes, UnderBufferedIOMixin::kSize,
/*basetype=*/true);
type = addBuiltinType(thread, ID(BufferedRandom), LayoutId::kBufferedRandom,
/*superclass_id=*/LayoutId::kUnderBufferedIOMixin,
kBufferedRandomAttributes, BufferedRandom::kSize,
/*basetype=*/true);
builtinTypeEnableTupleOverflow(thread, type);
type = addBuiltinType(thread, ID(BufferedReader), LayoutId::kBufferedReader,
/*superclass_id=*/LayoutId::kUnderBufferedIOMixin,
kBufferedReaderAttributes, BufferedReader::kSize,
/*basetype=*/true);
builtinTypeEnableTupleOverflow(thread, type);
type = addBuiltinType(thread, ID(BufferedWriter), LayoutId::kBufferedWriter,
/*superclass_id=*/LayoutId::kUnderBufferedIOMixin,
kBufferedWriterAttributes, BufferedWriter::kSize,
/*basetype=*/true);
builtinTypeEnableTupleOverflow(thread, type);
type = addBuiltinType(thread, ID(FileIO), LayoutId::kFileIO,
/*superclass_id=*/LayoutId::kUnderRawIOBase,
kFileIOAttributes, FileIO::kSize, /*basetype=*/true);
builtinTypeEnableTupleOverflow(thread, type);
addBuiltinType(thread, ID(_TextIOBase), LayoutId::kUnderTextIOBase,
/*superclass_id=*/LayoutId::kUnderIOBase, kNoAttributes,
RawUnderTextIOBase::kSize, /*basetype=*/true);
type = addBuiltinType(thread, ID(TextIOWrapper), LayoutId::kTextIOWrapper,
/*superclass_id=*/LayoutId::kUnderTextIOBase,
kTextIOWrapperAttributes, TextIOWrapper::kSize,
/*basetype=*/true);
builtinTypeEnableTupleOverflow(thread, type);
type =
addBuiltinType(thread, ID(StringIO), LayoutId::kStringIO,
/*superclass_id=*/LayoutId::kUnderTextIOBase,
kStringIOAttributes, StringIO::kSize, /*basetype=*/true);
builtinTypeEnableTupleOverflow(thread, type);
}
} // namespace py