in build_and_train_models/sm-introduction_to_blazingtext_word2vec_text8/validator.py [0:0]
def validate_hyperparameters(cfg):
warnings = 0
if "mode" in cfg:
tmp = cfg["mode"]
if tmp not in ["skipgram", "cbow", "batch_skipgram"]:
raise Exception('mode should be one of ["skipgram", "cbow", "batch_skipgram"]')
if "min_count" in cfg:
tmp = get_int(cfg["min_count"], "min_count")
if tmp < 0:
raise Exception("Parameter 'min_count' should be >= 0.")
if "sampling_threshold" in cfg:
tmp = get_float(cfg["sampling_threshold"], "sampling_threshold")
if tmp <= 0 or tmp >= 1:
raise Exception("Parameter 'sampling_threshold' should be between (0,1)")
if "learning_rate" in cfg: # Default: .05
tmp = get_float(cfg["learning_rate"], "learning_rate")
if tmp <= 0:
raise Exception("Parameter 'learning_rate' should be > 0.")
ws = 5
if "window_size" in cfg: # Default: 5
ws = get_int(cfg["window_size"], "window_size")
if ws <= 0:
raise Exception("Parameter 'window_size' should be > 0.")
if "vector_dim" in cfg: # Default: 100
tmp = get_int(cfg["vector_dim"], "vector_dim")
if tmp <= 0:
raise Exception("Parameter 'vector_dim' should be > 0.")
if tmp > 2048:
raise Exception("Parameter 'vector_dim' should be <= 2048.")
if tmp >= 1500:
warnings += 1
print(
"You are using a big vector dimension. Training might take a long time or might fail due to memory "
"issues."
)
if "epochs" in cfg: # Default: 5
tmp = get_int(cfg["epochs"], "epochs")
if tmp <= 0:
raise Exception("Parameter 'epochs' should be > 0.")
if "negative_samples" in cfg: # Default: 5
tmp = get_int(cfg["negative_samples"], "negative_samples")
if tmp <= 0:
raise Exception("Parameter 'negative_samples' should be > 0.")
if "batch_size" in cfg: # Default: 11
tmp = get_int(cfg["batch_size"], "batch_size")
if tmp <= 0:
raise Exception("Parameter 'batch_size' should be > 0.")
if tmp > 32:
raise Exception("Parameter 'batch_size' should be <= 32.")
reco = 2 * ws + 1
if tmp is not reco:
warnings += 1
print(
"It is recommended that you set batch_size as 2*window_size + 1 which is %s in this case."
% str(reco)
)
return warnings