Benchmarking RDF against MDAnalysis

The algorithms in freud are highly efficient and rely on parallelized C++ code. Below, we show a benchmark of freud.density.RDF and MDAnalysis.analysis.rdf.InterRDF. This benchmark was run on an Intel(R) Core(TM) i7-10750H CPU @ 2.60GHz.

[1]:
import multiprocessing as mp
import timeit

import freud
import gsd
import matplotlib.pyplot as plt
import MDAnalysis
import MDAnalysis.analysis.rdf
import numpy as np
from tqdm import tqdm
[2]:
trajectory_filename = "data/rdf_benchmark.gsd"
r_max = 5
r_min = 0.1
nbins = 75
[3]:
trajectory = MDAnalysis.coordinates.GSD.GSDReader(trajectory_filename)
topology = MDAnalysis.core.topology.Topology(n_atoms=trajectory[0].n_atoms)
u = MDAnalysis.Universe(topology, trajectory_filename, dt=1.0)

rdf = MDAnalysis.analysis.rdf.InterRDF(
    g1=u.atoms, g2=u.atoms, nbins=nbins, range=(r_min, r_max)
).run()
[4]:
plt.plot(rdf.results.bins, rdf.results.rdf)
plt.show()
../../../_images/gettingstarted_examples_examples_Benchmarking_RDF_against_MDAnalysis_4_0.png
[5]:
freud_rdf = freud.density.RDF(bins=nbins, r_max=r_max, r_min=r_min)
for frame in trajectory:
    freud_rdf.compute(system=frame, reset=False)
freud_rdf.plot();
../../../_images/gettingstarted_examples_examples_Benchmarking_RDF_against_MDAnalysis_5_0.png

Timing Functions

[6]:
def time_statement(stmt, repeat=3, number=1, **kwargs):
    timer = timeit.Timer(stmt=stmt, globals=kwargs)
    times = timer.repeat(repeat, number)
    return np.mean(times), np.std(times)
[7]:
def time_mdanalysis_rdf(trajectory_filename, r_max, r_min, nbins):
    trajectory = MDAnalysis.coordinates.GSD.GSDReader(trajectory_filename)
    frame = trajectory[0]
    topology = MDAnalysis.core.topology.Topology(n_atoms=frame.n_atoms)
    u = MDAnalysis.Universe(topology, trajectory_filename, dt=1.0)
    code = """rdf = MDAnalysis.analysis.rdf.InterRDF(g1=u.atoms, g2=u.atoms, nbins=nbins, range=(r_min, r_max)).run()"""
    return time_statement(
        code, MDAnalysis=MDAnalysis, u=u, r_max=r_max, r_min=r_min, nbins=nbins
    )
[8]:
def time_freud_rdf(trajectory_filename, r_max, r_min, nbins):
    trajectory = MDAnalysis.coordinates.GSD.GSDReader(trajectory_filename)
    code = """
rdf = freud.density.RDF(bins=nbins, r_max=r_max, r_min=r_min)
for frame in trajectory:
    rdf.compute(system=frame, reset=False)"""
    return time_statement(
        code, freud=freud, trajectory=trajectory, r_max=r_max, r_min=r_min, nbins=nbins
    )
[9]:
# Test timing functions
params = dict(
    trajectory_filename=trajectory_filename, r_max=r_max, r_min=r_min, nbins=nbins
)


def system_size(trajectory_filename, **kwargs):
    with gsd.hoomd.open(params["trajectory_filename"], "r") as trajectory:
        return {
            "frames": len(trajectory),
            "particles": len(trajectory[0].particles.position),
        }


print(system_size(**params))
mdanalysis_rdf_runtime = time_mdanalysis_rdf(**params)
print("MDAnalysis:", mdanalysis_rdf_runtime)
freud_rdf_runtime = time_freud_rdf(**params)
print("freud:", freud_rdf_runtime)
{'frames': 5, 'particles': 15625}
MDAnalysis: (33.57325088791549, 0.3131925112569526)
freud: (1.2264695509026449, 0.022346588736873306)

Perform Measurements

[10]:
def measure_runtime_scaling_r_max(r_maxes, **params):
    result_times = []
    for r_max in tqdm(r_maxes):
        params.update(dict(r_max=r_max))
        freud.parallel.set_num_threads(1)
        freud_single = time_freud_rdf(**params)
        freud.parallel.set_num_threads(0)
        result_times.append(
            (time_mdanalysis_rdf(**params), freud_single, time_freud_rdf(**params))
        )
    return np.asarray(result_times)
[11]:
def plot_result_times(result_times, r_maxes, frames, particles):
    plt.figure(figsize=(6, 4), dpi=200)
    plt.errorbar(
        r_maxes,
        result_times[:, 0, 0],
        result_times[:, 0, 1],
        label=f"MDAnalysis v{MDAnalysis.__version__} analysis.rdf.InterRDF",
    )
    plt.errorbar(
        r_maxes,
        result_times[:, 1, 0],
        result_times[:, 1, 1],
        label=f"freud v{freud.__version__} density.RDF, 1 thread",
    )
    plt.errorbar(
        r_maxes,
        result_times[:, 2, 0],
        result_times[:, 2, 1],
        label=f"freud v{freud.__version__} density.RDF, {mp.cpu_count()} threads",
    )
    plt.title(rf"RDF for {frames} frames, {particles} particles")
    plt.xlabel(r"RDF $r_{{max}}$")
    plt.ylabel(r"Average Runtime (s)")
    plt.yscale("log")
    plt.legend()
    plt.show()
[12]:
r_maxes = [0.2, 0.3, 0.5, 1, 2, 3]
[13]:
result_times = measure_runtime_scaling_r_max(r_maxes, **params)
plot_result_times(result_times, r_maxes, **system_size(params["trajectory_filename"]))
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:44<00:00,  7.34s/it]
../../../_images/gettingstarted_examples_examples_Benchmarking_RDF_against_MDAnalysis_15_1.png
[14]:
print(
    f"Speedup, parallel freud / serial freud: {np.average(result_times[:, 1, 0] / result_times[:, 2, 0]):.3f}x"
)
print(
    f"Speedup, parallel freud / MDAnalysis: {np.average(result_times[:, 0, 0] / result_times[:, 2, 0]):.3f}x"
)
print(
    f"Speedup, serial freud / MDAnalysis: {np.average(result_times[:, 0, 0] / result_times[:, 1, 0]):.3f}x"
)
Speedup, parallel freud / serial freud: 5.944x
Speedup, parallel freud / MDAnalysis: 17.440x
Speedup, serial freud / MDAnalysis: 3.112x