in utils_nlp/models/glove/src/glove.c [178:290]
int save_params(int nb_iter) {
/*
* nb_iter is the number of iteration (= a full pass through the cooccurrence matrix).
* nb_iter > 0 => checkpointing the intermediate parameters, so nb_iter is in the filename of output file.
* else => saving the final paramters, so nb_iter is ignored.
*/
long long a, b;
char format[20];
char output_file[MAX_STRING_LENGTH], output_file_gsq[MAX_STRING_LENGTH];
char *word = malloc(sizeof(char) * MAX_STRING_LENGTH + 1);
FILE *fid, *fout, *fgs;
if (use_binary > 0) { // Save parameters in binary file
if (nb_iter <= 0)
sprintf(output_file,"%s.bin",save_W_file);
else
sprintf(output_file,"%s.%03d.bin",save_W_file,nb_iter);
fout = fopen(output_file,"wb");
if (fout == NULL) {fprintf(stderr, "Unable to open file %s.\n",save_W_file); return 1;}
for (a = 0; a < 2 * (long long)vocab_size * (vector_size + 1); a++) fwrite(&W[a], sizeof(real), 1,fout);
fclose(fout);
if (save_gradsq > 0) {
if (nb_iter <= 0)
sprintf(output_file_gsq,"%s.bin",save_gradsq_file);
else
sprintf(output_file_gsq,"%s.%03d.bin",save_gradsq_file,nb_iter);
fgs = fopen(output_file_gsq,"wb");
if (fgs == NULL) {fprintf(stderr, "Unable to open file %s.\n",save_gradsq_file); return 1;}
for (a = 0; a < 2 * (long long)vocab_size * (vector_size + 1); a++) fwrite(&gradsq[a], sizeof(real), 1,fgs);
fclose(fgs);
}
}
if (use_binary != 1) { // Save parameters in text file
if (nb_iter <= 0)
sprintf(output_file,"%s.txt",save_W_file);
else
sprintf(output_file,"%s.%03d.txt",save_W_file,nb_iter);
if (save_gradsq > 0) {
if (nb_iter <= 0)
sprintf(output_file_gsq,"%s.txt",save_gradsq_file);
else
sprintf(output_file_gsq,"%s.%03d.txt",save_gradsq_file,nb_iter);
fgs = fopen(output_file_gsq,"wb");
if (fgs == NULL) {fprintf(stderr, "Unable to open file %s.\n",save_gradsq_file); return 1;}
}
fout = fopen(output_file,"wb");
if (fout == NULL) {fprintf(stderr, "Unable to open file %s.\n",save_W_file); return 1;}
fid = fopen(vocab_file, "r");
sprintf(format,"%%%ds",MAX_STRING_LENGTH);
if (fid == NULL) {fprintf(stderr, "Unable to open file %s.\n",vocab_file); return 1;}
if (write_header) fprintf(fout, "%lld %d\n", vocab_size, vector_size);
for (a = 0; a < vocab_size; a++) {
if (fscanf(fid,format,word) == 0) return 1;
// input vocab cannot contain special <unk> keyword
if (strcmp(word, "<unk>") == 0) return 1;
fprintf(fout, "%s",word);
if (model == 0) { // Save all parameters (including bias)
for (b = 0; b < (vector_size + 1); b++) fprintf(fout," %lf", W[a * (vector_size + 1) + b]);
for (b = 0; b < (vector_size + 1); b++) fprintf(fout," %lf", W[(vocab_size + a) * (vector_size + 1) + b]);
}
if (model == 1) // Save only "word" vectors (without bias)
for (b = 0; b < vector_size; b++) fprintf(fout," %lf", W[a * (vector_size + 1) + b]);
if (model == 2) // Save "word + context word" vectors (without bias)
for (b = 0; b < vector_size; b++) fprintf(fout," %lf", W[a * (vector_size + 1) + b] + W[(vocab_size + a) * (vector_size + 1) + b]);
fprintf(fout,"\n");
if (save_gradsq > 0) { // Save gradsq
fprintf(fgs, "%s",word);
for (b = 0; b < (vector_size + 1); b++) fprintf(fgs," %lf", gradsq[a * (vector_size + 1) + b]);
for (b = 0; b < (vector_size + 1); b++) fprintf(fgs," %lf", gradsq[(vocab_size + a) * (vector_size + 1) + b]);
fprintf(fgs,"\n");
}
if (fscanf(fid,format,word) == 0) return 1; // Eat irrelevant frequency entry
}
if (use_unk_vec) {
real* unk_vec = (real*)calloc((vector_size + 1), sizeof(real));
real* unk_context = (real*)calloc((vector_size + 1), sizeof(real));
word = "<unk>";
int num_rare_words = vocab_size < 100 ? vocab_size : 100;
for (a = vocab_size - num_rare_words; a < vocab_size; a++) {
for (b = 0; b < (vector_size + 1); b++) {
unk_vec[b] += W[a * (vector_size + 1) + b] / num_rare_words;
unk_context[b] += W[(vocab_size + a) * (vector_size + 1) + b] / num_rare_words;
}
}
fprintf(fout, "%s",word);
if (model == 0) { // Save all parameters (including bias)
for (b = 0; b < (vector_size + 1); b++) fprintf(fout," %lf", unk_vec[b]);
for (b = 0; b < (vector_size + 1); b++) fprintf(fout," %lf", unk_context[b]);
}
if (model == 1) // Save only "word" vectors (without bias)
for (b = 0; b < vector_size; b++) fprintf(fout," %lf", unk_vec[b]);
if (model == 2) // Save "word + context word" vectors (without bias)
for (b = 0; b < vector_size; b++) fprintf(fout," %lf", unk_vec[b] + unk_context[b]);
fprintf(fout,"\n");
free(unk_vec);
free(unk_context);
}
fclose(fid);
fclose(fout);
if (save_gradsq > 0) fclose(fgs);
}
return 0;
}