benchmarks/benchmarks/bench_pickle.py (235 lines of code) (raw):

#!/usr/bin/env python3 """Script for testing the performance of pickling/unpickling. This will pickle/unpickle several real world-representative objects a few thousand times. The methodology below was chosen for was chosen to be similar to real-world scenarios which operate on single objects at a time. Note that if we did something like pickle.dumps([dict(some_dict) for _ in range(10000)]) this isn't equivalent to dumping the dict 10000 times: pickle uses a highly-efficient encoding for the n-1 following copies. """ import sys # use pure python sys.modules["_pickle"] = None import pickle if not getattr(pickle.Pickler, "__module__", "<jython>") == "pickle": raise RuntimeError("Unexpected C accelerators for pickle") import argparse import datetime import random __author__ = "collinwinter@google.com (Collin Winter)" DEFAULT_LOOPS_PICKLE = 8 DEFAULT_LOOPS_PICKLE_LIST = 32 DEFAULT_LOOPS_PICKLE_DICT = 8 DICT = { "ads_flags": 0, "age": 18, "birthday": datetime.date(1980, 5, 7), "bulletin_count": 0, "comment_count": 0, "country": "BR", "encrypted_id": "G9urXXAJwjE", "favorite_count": 9, "first_name": "", "flags": 412317970704, "friend_count": 0, "gender": "m", "gender_for_display": "Male", "id": 302935349, "is_custom_profile_icon": 0, "last_name": "", "locale_preference": "pt_BR", "member": 0, "tags": ["a", "b", "c", "d", "e", "f", "g"], "profile_foo_id": 827119638, "secure_encrypted_id": "Z_xxx2dYx3t4YAdnmfgyKw", "session_number": 2, "signup_id": "201-19225-223", "status": "A", "theme": 1, "time_created": 1225237014, "time_updated": 1233134493, "unread_message_count": 0, "user_group": "0", "username": "collinwinter", "play_count": 9, "view_count": 7, "zip": "", } TUPLE = ( [ 265867233, 265868503, 265252341, 265243910, 265879514, 266219766, 266021701, 265843726, 265592821, 265246784, 265853180, 45526486, 265463699, 265848143, 265863062, 265392591, 265877490, 265823665, 265828884, 265753032, ], 60, ) def mutate_dict(orig_dict, random_source): new_dict = dict(orig_dict) for key, _value in new_dict.items(): rand_val = random_source.random() * sys.maxsize if isinstance(key, (int, bytes, str)): new_dict[key] = type(key)(rand_val) return new_dict random_source = random.Random(5) # Fixed seed. DICT_GROUP = [mutate_dict(DICT, random_source) for _ in range(3)] def bench_pickle(loops, pickle, protocol): range_it = range(loops) # micro-optimization: use fast local variables dumps = pickle.dumps objs = (DICT, TUPLE, DICT_GROUP) for _ in range_it: for obj in objs: # 20 dumps dumps(obj, protocol) dumps(obj, protocol) dumps(obj, protocol) dumps(obj, protocol) dumps(obj, protocol) dumps(obj, protocol) dumps(obj, protocol) dumps(obj, protocol) dumps(obj, protocol) dumps(obj, protocol) dumps(obj, protocol) dumps(obj, protocol) dumps(obj, protocol) dumps(obj, protocol) dumps(obj, protocol) dumps(obj, protocol) dumps(obj, protocol) dumps(obj, protocol) dumps(obj, protocol) dumps(obj, protocol) def bench_unpickle(loops, pickle, protocol): pickled_dict = pickle.dumps(DICT, protocol) pickled_tuple = pickle.dumps(TUPLE, protocol) pickled_dict_group = pickle.dumps(DICT_GROUP, protocol) range_it = range(loops) # micro-optimization: use fast local variables loads = pickle.loads objs = (pickled_dict, pickled_tuple, pickled_dict_group) for _ in range_it: for obj in objs: # 20 loads dict loads(obj) loads(obj) loads(obj) loads(obj) loads(obj) loads(obj) loads(obj) loads(obj) loads(obj) loads(obj) loads(obj) loads(obj) loads(obj) loads(obj) loads(obj) loads(obj) loads(obj) loads(obj) loads(obj) loads(obj) LIST = [[list(range(10)), list(range(10))] for _ in range(10)] def bench_pickle_list(loops, pickle, protocol): range_it = range(loops) # micro-optimization: use fast local variables dumps = pickle.dumps obj = LIST for _ in range_it: # 10 dumps list dumps(obj, protocol) dumps(obj, protocol) dumps(obj, protocol) dumps(obj, protocol) dumps(obj, protocol) dumps(obj, protocol) dumps(obj, protocol) dumps(obj, protocol) dumps(obj, protocol) dumps(obj, protocol) def bench_unpickle_list(loops, pickle, protocol): pickled_list = pickle.dumps(LIST, protocol) range_it = range(loops) # micro-optimization: use fast local variables loads = pickle.loads for _ in range_it: # 10 loads list loads(pickled_list) loads(pickled_list) loads(pickled_list) loads(pickled_list) loads(pickled_list) loads(pickled_list) loads(pickled_list) loads(pickled_list) loads(pickled_list) loads(pickled_list) MICRO_DICT = {key: dict.fromkeys(range(10)) for key in range(100)} def bench_pickle_dict(loops, pickle, protocol): range_it = range(loops) # micro-optimization: use fast local variables obj = MICRO_DICT for _ in range_it: # 5 dumps dict pickle.dumps(obj, protocol) pickle.dumps(obj, protocol) pickle.dumps(obj, protocol) pickle.dumps(obj, protocol) pickle.dumps(obj, protocol) BENCHMARKS = { # 20 inner-loops: don't count the 3 pickled objects "pickle": (bench_pickle, 20), # 20 inner-loops: don't count the 3 unpickled objects "unpickle": (bench_unpickle, 20), "pickle_list": (bench_pickle_list, 10), "unpickle_list": (bench_unpickle_list, 10), "pickle_dict": (bench_pickle_dict, 5), } def run(): # use pure python sys.modules["_pickle"] = None import pickle if not getattr(pickle.Pickler, "__module__", "<jython>") == "pickle": raise RuntimeError("Unexpected C accelerators for pickle") protocol = pickle.HIGHEST_PROTOCOL bench_pickle(DEFAULT_LOOPS_PICKLE, pickle, protocol) bench_unpickle(DEFAULT_LOOPS_PICKLE, pickle, protocol) bench_pickle_list(DEFAULT_LOOPS_PICKLE_LIST, pickle, protocol) bench_unpickle_list(DEFAULT_LOOPS_PICKLE_LIST, pickle, protocol) bench_pickle_dict(DEFAULT_LOOPS_PICKLE_DICT, pickle, protocol) def warmup(pickle, protocol): bench_pickle(1, pickle, protocol) bench_unpickle(1, pickle, protocol) bench_pickle_list(1, pickle, protocol) bench_unpickle_list(1, pickle, protocol) bench_pickle_dict(1, pickle, protocol) def jit(pickle): try: from _builtins import _jit_fromlist _jit_fromlist( [ pickle.dumps, pickle.loads, ] ) except ImportError: pass if __name__ == "__main__": parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter ) parser.add_argument( "num_iterations", type=int, default=1, nargs="?", help="Number of iterations to run the benchmark", ) parser.add_argument("--jit", action="store_true", help="Run in JIT mode") args = parser.parse_args() protocol = pickle.HIGHEST_PROTOCOL warmup(pickle, protocol) if args.jit: jit(pickle) for _ in range(args.num_iterations): bench_pickle(DEFAULT_LOOPS_PICKLE, pickle, protocol) bench_unpickle(DEFAULT_LOOPS_PICKLE, pickle, protocol) bench_pickle_list(DEFAULT_LOOPS_PICKLE_LIST, pickle, protocol) bench_unpickle_list(DEFAULT_LOOPS_PICKLE_LIST, pickle, protocol) bench_pickle_dict(DEFAULT_LOOPS_PICKLE_DICT, pickle, protocol)