def write_rows()

in Example-Project/utils.py [0:0]


def write_rows(rows, data, output_file, bucket, path):
    '''
    Takes the pair indices, data object and output file name
        Gets the content from the data
        Writes the pair to the outfile
    '''
    
    with open(output_file, 'a') as f:
        
        f.write('<|startoftext|>')
        
        # loop through rows
        for idx, r in enumerate(rows):
        
            # get the end of this segment
            if idx < (len(rows)-1):
                end_of_snippet = int(rows[idx+1].split('_')[0])
            else:
                end_of_snippet = None
                
            if 'markdown' in r:
                m_idx = int(r.split('_')[0])
                markdown_content = get_markdown(data[m_idx:end_of_snippet])

                f.write(markdown_content)

                f.write('\n')

            elif 'code' in r:
                c_idx = int(r.split('_')[0])

                code_content = get_code(data[c_idx: end_of_snippet])

                f.write('\n')

                f.write(code_content)
                
        f.write('<|endoftext|>')

                
    os.system('aws s3 cp {} s3://{}/{}/'.format(output_file, bucket, path))