def get_memory_events()

in tb_plugin/torch_tb_profiler/run.py [0:0]


    def get_memory_events(
            p: Union["RunProfile", RunProfileData],
            start_ts=None,
            end_ts=None,
            time_metric: str = "ms",
            memory_metric: str = "K",
        ):
        def get_op_name_or_ctx(record: MemoryRecord):
            name = record.op_name_or_unknown
            if name.startswith("aten::empty") and record.parent_op_name:
                # aten::empty can be treated as the "malloc" in pytorch
                name = f"{record.parent_op_name} ({name})"
            return name

        cano = Canonicalizer(time_metric=time_metric, memory_metric=memory_metric)
        round = DisplayRounder(ndigits=2)

        profiler_start_ts = p.profiler_start_ts
        memory_records = RunProfile._filtered_by_ts(p.memory_parser.all_records, start_ts, end_ts)

        events = defaultdict(list)
        alloc = {}  # allocation events may or may not have paired free event
        free = {}  # free events that does not have paired alloc event
        prev_ts = float("-inf")  # ensure ordered memory records is ordered
        for i, r in enumerate(memory_records):
            if r.addr is None:
                # profile json data prior to pytorch 1.10 do not have addr
                # we should ignore them
                continue
            assert prev_ts < r.ts
            prev_ts = r.ts
            addr = r.addr
            size = r.bytes
            if r.is_allocation:
                # to be matched with a release event
                alloc[addr] = i
            else:
                if addr in alloc:
                    alloc_r = memory_records[alloc[addr]]
                    alloc_ts = alloc_r.ts
                    free_ts = r.ts
                    events[alloc_r.device_name].append([
                        get_op_name_or_ctx(alloc_r),
                        round(cano.convert_memory(-size)),
                        round(cano.convert_time(alloc_ts - profiler_start_ts)),
                        round(cano.convert_time(free_ts - profiler_start_ts)),
                        round(cano.convert_time(free_ts - alloc_ts)),
                    ])
                    del alloc[addr]
                else:
                    assert addr not in free
                    free[addr] = i

        for i in alloc.values():
            r = memory_records[i]
            events[r.device_name].append([
                get_op_name_or_ctx(r),
                round(cano.convert_memory(r.bytes)),
                round(cano.convert_time(r.ts - profiler_start_ts)),
                None,
                None,
            ])

        for i in free.values():
            r = memory_records[i]
            events[r.device_name].append([
                get_op_name_or_ctx(r),
                round(cano.convert_memory(-r.bytes)),
                None,
                round(cano.convert_time(r.ts - profiler_start_ts)),
                None,
            ])

        default_device = "CPU"
        for dev_name in sorted(events.keys()):
            if dev_name.startswith("GPU"):
                default_device = dev_name
                break

        return {
            "metadata": {
                "title": "Memory Events",
                "default_device": default_device,
            },
            "columns": [
                {"name": "Operator", "type": "string", "tooltip": ""},
                {"name": f"Size ({cano.memory_metric})", "type": "number", "tooltip": ""},
                {"name": f"Allocation Time ({cano.time_metric})", "type": "number", "tooltip": ""},
                {"name": f"Release Time ({cano.time_metric})", "type": "number", "tooltip": ""},
                {"name": f"Duration ({cano.time_metric})", "type": "number", "tooltip": ""},
            ],
            "rows": events,  # in the form of { "CPU": [...], "GPU0": [...], ... }
        }