in oss2/api.py [0:0]
def select_object(self, key, sql,
progress_callback=None,
select_params=None,
byte_range=None,
headers=None
):
"""Select一个文件内容,支持(Csv,Json Doc,Json Lines及其GZIP压缩文件).
用法 ::
对于Csv:
>>> result = bucket.select_object('access.log', 'select * from ossobject where _4 > 40')
>>> print(result.read())
'hello world'
对于Json Doc: { contacts:[{"firstName":"abc", "lastName":"def"},{"firstName":"abc1", "lastName":"def1"}]}
>>> result = bucket.select_object('sample.json', 'select s.firstName, s.lastName from ossobject.contacts[*] s', select_params = {"Json_Type":"DOCUMENT"})
对于Json Lines: {"firstName":"abc", "lastName":"def"},{"firstName":"abc1", "lastName":"def1"}
>>> result = bucket.select_object('sample.json', 'select s.firstName, s.lastName from ossobject s', select_params = {"Json_Type":"LINES"})
:param key: 文件名
:param sql: sql statement
:param select_params: select参数集合,对于Json文件必须制定Json_Type类型。参见 :ref:`select_params`
:param progress_callback: 用户指定的进度回调函数。参考 :ref:`progress_callback`
:param byte_range: select content of specific range。可以设置Bytes header指定select csv时的文件起始offset和长度。
:param headers: HTTP头部
:type headers: 可以是dict,建议是oss2.CaseInsensitiveDict
:return: file-like object
:raises: 如果文件不存在,则抛出 :class:`NoSuchKey <oss2.exceptions.NoSuchKey>` ;还可能抛出其他异常
"""
range_select = False
headers = http.CaseInsensitiveDict(headers)
range_string = _make_range_string(byte_range)
if range_string:
headers['range'] = range_string
range_select = True
if (range_select == True and
(select_params is None or
(SelectParameters.AllowQuotedRecordDelimiter not in select_params or str(select_params[SelectParameters.AllowQuotedRecordDelimiter]).lower() != 'false'))):
raise ClientError('"AllowQuotedRecordDelimiter" must be specified in select_params as False when "Range" is specified in header.')
body = xml_utils.to_select_object(sql, select_params)
params = {'x-oss-process': 'csv/select'}
if select_params is not None and SelectParameters.Json_Type in select_params:
params['x-oss-process'] = 'json/select'
self.timeout = 3600
resp = self.__do_object('POST', key, data=body, headers=headers, params=params)
crc_enabled = False
if select_params is not None and SelectParameters.EnablePayloadCrc in select_params:
if str(select_params[SelectParameters.EnablePayloadCrc]).lower() == "true":
crc_enabled = True
return SelectObjectResult(resp, progress_callback, crc_enabled)