in prompts/khanacademy/khan_dl/khan_dl.py [0:0]
def get_course_all_slugs(self):
"""Generate slugs for all units"""
unit_lessons_counter = 0
# Unit Page -> Subunit Header + Subunit Block -> Lesson Block -> Lesson Title
for course_unit_url, course_unit_slug, course_unit_title in zip(
self.course_unit_urls, self.course_unit_slugs, self.course_unit_titles
):
unit_lessons_counter = 0
# -> Unit Page
try:
course_unit_page = BeautifulSoup(
requests.get(ROOT_URL + course_unit_url).text, "lxml"
)
except requests.ConnectionError as e:
print("Error Connecting!\n", e)
sys.exit(1)
except requests.exceptions.HTTPError as errh:
print("Http Error:", errh)
sys.exit(1)
except requests.exceptions.ConnectionError as errc:
print("Error Connecting:", errc)
sys.exit(1)
except requests.exceptions.Timeout as errt:
print("Timeout Error:", errt)
sys.exit(1)
except requests.exceptions.RequestException as err:
print("OOps: Something Else", err)
sys.exit(1)
subunit_couter = 0
subunits = []
# -> Subunit Header -> Subunit Block
for course_subunit_title, course_subunit_body in zip(
course_unit_page.find_all(attrs=COURSE_SUBUNIT_TITLE_ATTRS),
course_unit_page.find_all(
COURSE_SUBUNIT_BODY["tag"], class_=COURSE_SUBUNIT_BODY["class"]
),
):
logging.debug("course_subunit_title:{}".format(course_subunit_title))
lesson_counter = 0
# -> Lesson Block
lessons = []
for course_lesson_body in course_subunit_body.find_all(
COURSE_LESSON_BODY["tag"],
{
"class": [
COURSE_LESSON_BODY["class_i"],
COURSE_LESSON_BODY["class_ii"],
]
},
):
course_lesson_span = course_lesson_body.find_all(
COURSE_LESSON_SPAN["tag"], class_=COURSE_LESSON_SPAN["class"]
)
course_lesson_aria_label = course_lesson_span[0][
COURSE_LESSON_LABEL
]
logging.debug(
"course_lesson_aria_label:{}".format(course_lesson_aria_label)
)
# -> Lesson Title
# Check whether lesson block is a video
if course_lesson_aria_label == "Video":
lesson_title = course_lesson_body.find(
COURSE_LESSON_TITLE["tag"],
class_=COURSE_LESSON_TITLE["class"],
)
logging.debug(
"course_lesson_title:{}".format(lesson_title.text)
)
lessons.append(lesson_title.text.strip())
self.lesson_titles.append(lesson_title.text)
self.course_all_slugs.append(
self.output_rel_path
+ course_unit_slug
+ "/"
+ str(subunit_couter)
+ "_"
+ course_subunit_title.text.replace(" ", "_")
+ "/"
+ str(lesson_counter)
+ "_"
+ lesson_title.text.replace(" ", "_")
)
lesson_counter += 1
unit_lessons_counter += lesson_counter
subunit_couter += 1
subunits.append({
"title": course_subunit_title.text.strip(),
"lessons": lessons
})
self.course_subunits.append({
"title": course_unit_title,
"subunits": subunits
})
self.unit_slugs_counter[course_unit_url] = unit_lessons_counter
logging.info(len(self.course_all_slugs))
logging.info("Course - All slugs generated")