DST/dataclass.py [230:275]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        special_tokens = ontology.sos_eos_tokens
        print (special_tokens)
        #print (special_tokens)
        self.tokenizer.add_tokens(special_tokens)
        return special_tokens

    def flatten_data(self, data):
        '''
            transform session data input turn data
            each item in session has length of (number of turns)
            each turn has the following keys: 
                dial_id: data id
                user: user input at this turn; 
                      e.g. '<sos_u> i am looking for an expensive restaurant in the centre . thank you . <eos_u>'
                usdx: delexicalized user input at this turn; 
                      e.g. '<sos_u> i am looking for an expensive restaurant in the centre . thank you . <eos_u>'
                resp: delexicialized system response; 
                      e.g. '<sos_r> there are several restaurant -s in the price range what type of food would you like to eat ? <eos_r>'
                bspn: belief state span;
                      e.g. '<sos_b> [restaurant] pricerange expensive area centre <eos_b>'
                bsdx: delexicialized belief state span;
                      e.g. '<sos_b> [restaurant] pricerange area <eos_b>'
                aspn: action span;
                      e.g. '<sos_a> [restaurant] [request] food <eos_a>'
                dspn: domain span;
                      e.g. '<sos_d> [restaurant] <eos_d>'
                pointer: e.g. [0, 0, 0, 1, 0, 0]
                turn_domain: e.g. ['[restaurant]']
                turn_num: the turn number in current session
                db: database result e.g. '<sos_db> [db_3] <eos_db>'
                bspn_reform: reformed belief state;
                      e.g. '<sos_b> [restaurant] pricerange = expensive , area = centre <eos_b>'
                bsdx_reform: reformed delexicialized belief state;
                      e.g. '<sos_b> [restaurant] pricerange , area <eos_b>'
                aspn_reform: reformed dialogue action;
                      e.g. '<sos_a> [restaurant] [request] food <eos_a>'
        '''
        data_list = []
        for session in data:
            one_dial_id = session[0]['dial_id']
            turn_num = len(session)
            previous_context = [] # previous context contains all previous user input and system response
            for turn_id in range(turn_num):
                curr_turn = session[turn_id]
                assert curr_turn['turn_num'] == turn_id # the turns should be arranged in order
                curr_user_input = curr_turn['user']
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



E2E_TOD/dataclass.py [247:292]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        special_tokens = ontology.sos_eos_tokens
        print (special_tokens)
        #print (special_tokens)
        self.tokenizer.add_tokens(special_tokens)
        return special_tokens

    def flatten_data(self, data):
        '''
            transform session data input turn data
            each item in session has length of (number of turns)
            each turn has the following keys: 
                dial_id: data id
                user: user input at this turn; 
                      e.g. '<sos_u> i am looking for an expensive restaurant in the centre . thank you . <eos_u>'
                usdx: delexicalized user input at this turn; 
                      e.g. '<sos_u> i am looking for an expensive restaurant in the centre . thank you . <eos_u>'
                resp: delexicialized system response; 
                      e.g. '<sos_r> there are several restaurant -s in the price range what type of food would you like to eat ? <eos_r>'
                bspn: belief state span;
                      e.g. '<sos_b> [restaurant] pricerange expensive area centre <eos_b>'
                bsdx: delexicialized belief state span;
                      e.g. '<sos_b> [restaurant] pricerange area <eos_b>'
                aspn: action span;
                      e.g. '<sos_a> [restaurant] [request] food <eos_a>'
                dspn: domain span;
                      e.g. '<sos_d> [restaurant] <eos_d>'
                pointer: e.g. [0, 0, 0, 1, 0, 0]
                turn_domain: e.g. ['[restaurant]']
                turn_num: the turn number in current session
                db: database result e.g. '<sos_db> [db_3] <eos_db>'
                bspn_reform: reformed belief state;
                      e.g. '<sos_b> [restaurant] pricerange = expensive , area = centre <eos_b>'
                bsdx_reform: reformed delexicialized belief state;
                      e.g. '<sos_b> [restaurant] pricerange , area <eos_b>'
                aspn_reform: reformed dialogue action;
                      e.g. '<sos_a> [restaurant] [request] food <eos_a>'
        '''
        data_list = []
        for session in data:
            one_dial_id = session[0]['dial_id']
            turn_num = len(session)
            previous_context = [] # previous context contains all previous user input and system response
            for turn_id in range(turn_num):
                curr_turn = session[turn_id]
                assert curr_turn['turn_num'] == turn_id # the turns should be arranged in order
                curr_user_input = curr_turn['user']
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



