in src/chug/wds/helpers.py [0:0]
def expand_urls(urls, weights=None):
if weights is None:
expanded_urls = wds.shardlists.expand_urls(urls)
return expanded_urls
if isinstance(urls, str):
urllist = urls.split("::")
weights = weights.split('::')
assert len(weights) == len(urllist), \
f"Expected the number of data components ({len(urllist)}) and weights({len(weights)}) to match."
weights = [float(weight) for weight in weights]
all_urls, all_weights = [], []
for url, weight in zip(urllist, weights):
expanded_url = list(braceexpand.braceexpand(url))
expanded_weights = [weight for _ in expanded_url]
all_urls.extend(expanded_url)
all_weights.extend(expanded_weights)
return all_urls, all_weights
else:
all_urls = list(urls)
return all_urls, weights