in habanalabs/common/device.c [1323:1609]
int hl_device_init(struct hl_device *hdev, struct class *hclass)
{
int i, rc, cq_cnt, user_interrupt_cnt, cq_ready_cnt;
char *name;
bool add_cdev_sysfs_on_err = false;
name = kasprintf(GFP_KERNEL, "hl%d", hdev->id / 2);
if (!name) {
rc = -ENOMEM;
goto out_disabled;
}
/* Initialize cdev and device structures */
rc = device_init_cdev(hdev, hclass, hdev->id, &hl_ops, name,
&hdev->cdev, &hdev->dev);
kfree(name);
if (rc)
goto out_disabled;
name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->id / 2);
if (!name) {
rc = -ENOMEM;
goto free_dev;
}
/* Initialize cdev and device structures for control device */
rc = device_init_cdev(hdev, hclass, hdev->id_control, &hl_ctrl_ops,
name, &hdev->cdev_ctrl, &hdev->dev_ctrl);
kfree(name);
if (rc)
goto free_dev;
/* Initialize ASIC function pointers and perform early init */
rc = device_early_init(hdev);
if (rc)
goto free_dev_ctrl;
user_interrupt_cnt = hdev->asic_prop.user_interrupt_count;
if (user_interrupt_cnt) {
hdev->user_interrupt = kcalloc(user_interrupt_cnt,
sizeof(*hdev->user_interrupt),
GFP_KERNEL);
if (!hdev->user_interrupt) {
rc = -ENOMEM;
goto early_fini;
}
}
/*
* Start calling ASIC initialization. First S/W then H/W and finally
* late init
*/
rc = hdev->asic_funcs->sw_init(hdev);
if (rc)
goto user_interrupts_fini;
/* initialize completion structure for multi CS wait */
hl_multi_cs_completion_init(hdev);
/*
* Initialize the H/W queues. Must be done before hw_init, because
* there the addresses of the kernel queue are being written to the
* registers of the device
*/
rc = hl_hw_queues_create(hdev);
if (rc) {
dev_err(hdev->dev, "failed to initialize kernel queues\n");
goto sw_fini;
}
cq_cnt = hdev->asic_prop.completion_queues_count;
/*
* Initialize the completion queues. Must be done before hw_init,
* because there the addresses of the completion queues are being
* passed as arguments to request_irq
*/
if (cq_cnt) {
hdev->completion_queue = kcalloc(cq_cnt,
sizeof(*hdev->completion_queue),
GFP_KERNEL);
if (!hdev->completion_queue) {
dev_err(hdev->dev,
"failed to allocate completion queues\n");
rc = -ENOMEM;
goto hw_queues_destroy;
}
}
for (i = 0, cq_ready_cnt = 0 ; i < cq_cnt ; i++, cq_ready_cnt++) {
rc = hl_cq_init(hdev, &hdev->completion_queue[i],
hdev->asic_funcs->get_queue_id_for_cq(hdev, i));
if (rc) {
dev_err(hdev->dev,
"failed to initialize completion queue\n");
goto cq_fini;
}
hdev->completion_queue[i].cq_idx = i;
}
/*
* Initialize the event queue. Must be done before hw_init,
* because there the address of the event queue is being
* passed as argument to request_irq
*/
rc = hl_eq_init(hdev, &hdev->event_queue);
if (rc) {
dev_err(hdev->dev, "failed to initialize event queue\n");
goto cq_fini;
}
/* MMU S/W must be initialized before kernel context is created */
rc = hl_mmu_init(hdev);
if (rc) {
dev_err(hdev->dev, "Failed to initialize MMU S/W structures\n");
goto eq_fini;
}
/* Allocate the kernel context */
hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL);
if (!hdev->kernel_ctx) {
rc = -ENOMEM;
goto mmu_fini;
}
hdev->is_compute_ctx_active = false;
hdev->asic_funcs->state_dump_init(hdev);
hl_debugfs_add_device(hdev);
/* debugfs nodes are created in hl_ctx_init so it must be called after
* hl_debugfs_add_device.
*/
rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
if (rc) {
dev_err(hdev->dev, "failed to initialize kernel context\n");
kfree(hdev->kernel_ctx);
goto remove_device_from_debugfs;
}
rc = hl_cb_pool_init(hdev);
if (rc) {
dev_err(hdev->dev, "failed to initialize CB pool\n");
goto release_ctx;
}
/*
* From this point, override rc (=0) in case of an error to allow
* debugging (by adding char devices and create sysfs nodes as part of
* the error flow).
*/
add_cdev_sysfs_on_err = true;
/* Device is now enabled as part of the initialization requires
* communication with the device firmware to get information that
* is required for the initialization itself
*/
hdev->disabled = false;
rc = hdev->asic_funcs->hw_init(hdev);
if (rc) {
dev_err(hdev->dev, "failed to initialize the H/W\n");
rc = 0;
goto out_disabled;
}
/* Check that the communication with the device is working */
rc = hdev->asic_funcs->test_queues(hdev);
if (rc) {
dev_err(hdev->dev, "Failed to detect if device is alive\n");
rc = 0;
goto out_disabled;
}
rc = device_late_init(hdev);
if (rc) {
dev_err(hdev->dev, "Failed late initialization\n");
rc = 0;
goto out_disabled;
}
dev_info(hdev->dev, "Found %s device with %lluGB DRAM\n",
hdev->asic_name,
hdev->asic_prop.dram_size / SZ_1G);
rc = hl_vm_init(hdev);
if (rc) {
dev_err(hdev->dev, "Failed to initialize memory module\n");
rc = 0;
goto out_disabled;
}
/*
* Expose devices and sysfs nodes to user.
* From here there is no need to add char devices and create sysfs nodes
* in case of an error.
*/
add_cdev_sysfs_on_err = false;
rc = device_cdev_sysfs_add(hdev);
if (rc) {
dev_err(hdev->dev,
"Failed to add char devices and sysfs nodes\n");
rc = 0;
goto out_disabled;
}
/* Need to call this again because the max power might change,
* depending on card type for certain ASICs
*/
hl_set_max_power(hdev);
/*
* hl_hwmon_init() must be called after device_late_init(), because only
* there we get the information from the device about which
* hwmon-related sensors the device supports.
* Furthermore, it must be done after adding the device to the system.
*/
rc = hl_hwmon_init(hdev);
if (rc) {
dev_err(hdev->dev, "Failed to initialize hwmon\n");
rc = 0;
goto out_disabled;
}
dev_notice(hdev->dev,
"Successfully added device to habanalabs driver\n");
hdev->init_done = true;
/* After initialization is done, we are ready to receive events from
* the F/W. We can't do it before because we will ignore events and if
* those events are fatal, we won't know about it and the device will
* be operational although it shouldn't be
*/
hdev->asic_funcs->enable_events_from_fw(hdev);
return 0;
release_ctx:
if (hl_ctx_put(hdev->kernel_ctx) != 1)
dev_err(hdev->dev,
"kernel ctx is still alive on initialization failure\n");
remove_device_from_debugfs:
hl_debugfs_remove_device(hdev);
mmu_fini:
hl_mmu_fini(hdev);
eq_fini:
hl_eq_fini(hdev, &hdev->event_queue);
cq_fini:
for (i = 0 ; i < cq_ready_cnt ; i++)
hl_cq_fini(hdev, &hdev->completion_queue[i]);
kfree(hdev->completion_queue);
hw_queues_destroy:
hl_hw_queues_destroy(hdev);
sw_fini:
hdev->asic_funcs->sw_fini(hdev);
user_interrupts_fini:
kfree(hdev->user_interrupt);
early_fini:
device_early_fini(hdev);
free_dev_ctrl:
put_device(hdev->dev_ctrl);
free_dev:
put_device(hdev->dev);
out_disabled:
hdev->disabled = true;
if (add_cdev_sysfs_on_err)
device_cdev_sysfs_add(hdev);
if (hdev->pdev)
dev_err(&hdev->pdev->dev,
"Failed to initialize hl%d. Device is NOT usable !\n",
hdev->id / 2);
else
pr_err("Failed to initialize hl%d. Device is NOT usable !\n",
hdev->id / 2);
return rc;
}