Benchmarking RDF against MDAnalysis

The algorithms in freud are highly efficient and rely on parallelized C++ code. Below, we show a benchmark of freud.density.RDF against MDAnalysis.analysis.rdf. This benchmark was run on an Intel(R) Core(TM) i3-8100B CPU @ 3.60GHz.

[1]:
import freud
import gsd
import MDAnalysis
import MDAnalysis.analysis.rdf
import multiprocessing as mp
import numpy as np
import matplotlib.pyplot as plt
import timeit
from tqdm import tqdm
[2]:
trajectory_filename = 'data/rdf_benchmark.gsd'
r_max = 5
r_min = 0.1
nbins = 75
[3]:
trajectory = MDAnalysis.coordinates.GSD.GSDReader(trajectory_filename)
topology = MDAnalysis.core.topology.Topology(n_atoms=trajectory[0].n_atoms)
u = MDAnalysis.as_Universe(topology, trajectory_filename)

rdf = MDAnalysis.analysis.rdf.InterRDF(g1=u.atoms, g2=u.atoms,
                                       nbins=nbins,
                                       range=(r_min, r_max)).run()
[4]:
plt.plot(rdf.bins, rdf.rdf)
plt.show()
../../../_images/gettingstarted_examples_examples_Benchmarking_RDF_against_MDAnalysis_4_0.png
[5]:
freud_rdf = freud.density.RDF(bins=nbins, r_max=r_max, r_min=r_min)
for frame in trajectory:
    freud_rdf.compute(system=frame, reset=False)
freud_rdf
[5]:
../../../_images/gettingstarted_examples_examples_Benchmarking_RDF_against_MDAnalysis_5_0.png

Timing Functions

[6]:
def time_statement(stmt, repeat=3, number=1, **kwargs):
    timer = timeit.Timer(stmt=stmt, globals=kwargs)
    times = timer.repeat(repeat, number)
    return np.mean(times), np.std(times)
[7]:
def time_mdanalysis_rdf(trajectory_filename, r_max, r_min, nbins):
    trajectory = MDAnalysis.coordinates.GSD.GSDReader(trajectory_filename)
    frame = trajectory[0]
    topology = MDAnalysis.core.topology.Topology(n_atoms=frame.n_atoms)
    u = MDAnalysis.as_Universe(topology, trajectory_filename)
    code = """rdf = MDAnalysis.analysis.rdf.InterRDF(g1=u.atoms, g2=u.atoms, nbins=nbins, range=(r_min, r_max)).run()"""
    return time_statement(code, MDAnalysis=MDAnalysis, u=u, r_max=r_max, r_min=r_min, nbins=nbins)
[8]:
def time_freud_rdf(trajectory_filename, r_max, r_min, nbins):
    trajectory = MDAnalysis.coordinates.GSD.GSDReader(trajectory_filename)
    code = """
rdf = freud.density.RDF(bins=nbins, r_max=r_max, r_min=r_min)
for frame in trajectory:
    rdf.compute(system=frame, reset=False)"""
    return time_statement(code, freud=freud, trajectory=trajectory, r_max=r_max, r_min=r_min, nbins=nbins)
[9]:
# Test timing functions
params = dict(
    trajectory_filename=trajectory_filename,
    r_max=r_max,
    r_min=r_min,
    nbins=nbins)

def system_size(trajectory_filename, **kwargs):
    with gsd.hoomd.open(params['trajectory_filename'], 'rb') as trajectory:
        return {'frames': len(trajectory),
                'particles': len(trajectory[0].particles.position)}

print(system_size(**params))
mdanalysis_rdf_runtime = time_mdanalysis_rdf(**params)
print('MDAnalysis:', mdanalysis_rdf_runtime)
freud_rdf_runtime = time_freud_rdf(**params)
print('freud:', freud_rdf_runtime)
{'frames': 5, 'particles': 15625}
MDAnalysis: (18.00504128, 0.054983033593944214)
freud: (2.8556541516666747, 0.05481114115556424)

Perform Measurements

[10]:
def measure_runtime_scaling_r_max(r_maxes, **params):
    result_times = []
    for r_max in tqdm(r_maxes):
        params.update(dict(r_max=r_max))
        freud.parallel.set_num_threads(1)
        freud_single = time_freud_rdf(**params)
        freud.parallel.set_num_threads(0)
        result_times.append((time_mdanalysis_rdf(**params), freud_single, time_freud_rdf(**params)))
    return np.asarray(result_times)
[11]:
def plot_result_times(result_times, r_maxes, frames, particles):
    plt.figure(figsize=(6, 4), dpi=200)
    plt.errorbar(r_maxes, result_times[:, 0, 0], result_times[:, 0, 1],
                 label="MDAnalysis v{} analysis.rdf.InterRDF".format(MDAnalysis.__version__))
    plt.errorbar(r_maxes, result_times[:, 1, 0], result_times[:, 1, 1],
                 label="freud v{} density.RDF, 1 thread".format(freud.__version__))
    plt.errorbar(r_maxes, result_times[:, 2, 0], result_times[:, 2, 1],
                 label="freud v{} density.RDF, {} threads".format(freud.__version__, mp.cpu_count()))
    plt.title(r'RDF for {} frames, {} particles'.format(frames, particles))
    plt.xlabel(r'RDF $r_{{max}}$')
    plt.ylabel(r'Average Runtime (s)')
    plt.yscale('log')
    plt.legend()
    plt.show()
[12]:
r_maxes = [0.2, 0.3, 0.5, 1, 2, 3]
[13]:
result_times = measure_runtime_scaling_r_max(r_maxes, **params)
plot_result_times(result_times, r_maxes, **system_size(params['trajectory_filename']))
100%|██████████| 6/6 [00:31<00:00,  5.28s/it]
../../../_images/gettingstarted_examples_examples_Benchmarking_RDF_against_MDAnalysis_15_1.png
[14]:
print('Speedup, parallel freud / serial freud: {:.3f}x'.format(np.average(result_times[:, 1, 0] / result_times[:, 2, 0])))
print('Speedup, parallel freud / MDAnalysis: {:.3f}x'.format(np.average(result_times[:, 0, 0] / result_times[:, 2, 0])))
print('Speedup, serial freud / MDAnalysis: {:.3f}x'.format(np.average(result_times[:, 0, 0] / result_times[:, 1, 0])))
Speedup, parallel freud / serial freud: 2.900x
Speedup, parallel freud / MDAnalysis: 7.182x
Speedup, serial freud / MDAnalysis: 2.619x