in Example-Project/utils.py [0:0]
def write_rows(rows, data, output_file, bucket, path):
'''
Takes the pair indices, data object and output file name
Gets the content from the data
Writes the pair to the outfile
'''
with open(output_file, 'a') as f:
f.write('<|startoftext|>')
# loop through rows
for idx, r in enumerate(rows):
# get the end of this segment
if idx < (len(rows)-1):
end_of_snippet = int(rows[idx+1].split('_')[0])
else:
end_of_snippet = None
if 'markdown' in r:
m_idx = int(r.split('_')[0])
markdown_content = get_markdown(data[m_idx:end_of_snippet])
f.write(markdown_content)
f.write('\n')
elif 'code' in r:
c_idx = int(r.split('_')[0])
code_content = get_code(data[c_idx: end_of_snippet])
f.write('\n')
f.write(code_content)
f.write('<|endoftext|>')
os.system('aws s3 cp {} s3://{}/{}/'.format(output_file, bucket, path))