String building benchmarks code

Below is the full code that can be used to run the three micro-benchmarks described in my post about string concatenation:

#! -*- coding: utf-8 -*-
import csv
import io
import itertools
import functools
import math
import sys
import timeit

if sys.version_info < (3,):
    StringIO = io.BytesIO
else:
    StringIO = io.StringIO


def string_join(n_components):
    """Allocate a list of strings and concatenate them with join."""
    str_components = []
    for i in range(n_components):
        str_components.append("%d" % i)

    return "".join(str_components)


def string_buffer(n_components):
    """Build a string by incrementally writing to a StringIO."""
    buffer = StringIO()

    for i in range(n_components):
        buffer.write("%d" % i)

    buffer.seek(0)
    return buffer.read()


def string_concat(n_components):
    """Incrementally build a string with +="""
    str_out = ""
    for i in range(n_components):
        str_out += "%d" % i
    return str_out


if sys.version_info < (3, 4):
    # Backport simplified versions of some statistics functions
    def mean(values):
        return sum(values) / len(values)

    def stdev(values, xbar=None):
        N = len(values)

        if xbar is None:
            xbar = mean(values)

        variance = sum(((x - xbar) ** 2 for x in values)) / (N - 1)
        return math.sqrt(variance)


else:
    from statistics import mean, stdev


def _format_truncated(value, truncation):
    """Round and then format the number with an appropriate level of truncation."""
    truncated_value = round(value, truncation)

    truncation = max([truncation, 0])

    fmt_str = "0.%df" % truncation  # "0.1f" -> "13.1", "0.0f" -> "13"

    return format(truncated_value, fmt_str)


def _format_mean_std(mean_val, std_val):
    """Write a string with the approrpiate number of significant figures.

    This will round the mean so that the lowest significant digit is the
    highest significant digit of the standard deviation, e.g.

        >>> _format_mean_std(1.24, 0.3)
        "1.2 (± 0.3)"
        >>> _format_mean_std(5.7883, 11.9344)
        "6 (± 10)"
        >>> _format_mean_std(5.7883, 0.02455)
        "5.79 (± 0.02)"

    Standard deviation is only reported to 1 significant figure.
    """
    # Assume we have 1.24 ± 0.3, we want to round to 1.2, since any sig figs
    # an order of magnitude smaller than the standard deviation are suspect
    std_mag = math.log(std_val, 10)

    # Truncate the standard deviation to 1 sig fig.
    std_truncation = -int(math.floor(std_mag))
    mean_truncation = -1 * int(std_mag // 1)

    # Don't truncate too far - if the stdev is >= the mean, leave 1 std
    mean_round_mag = -math.log(abs(mean_val), 10)
    if mean_round_mag > mean_truncation:
        mean_truncation = math.ceil(mean_round_mag)

    mean_str = _format_truncated(mean_val, mean_truncation)
    std_str = _format_truncated(std_val, std_truncation)

    return "%s%s)" % (mean_str, std_str)


def _time_with_std(timer, number, k=5):
    """Get timing information in microseconds with std.

    Runs the ``timer`` timer with ``number`` repetitions of the snippet ``k``
    times and returns a string of the form "mean (±std)".
    """
    timing_values = []

    for _ in range(k):
        timing_value = timer.timeit(number=number)
        timing_value *= 1e6 / number  # Convert to microseconds
        timing_values.append(timing_value)

    time_mean = mean(timing_values)
    time_std = stdev(timing_values, xbar=time_mean)

    return time_mean, time_std


def main(csv_mode):
    outputs = []

    setup = "from __main__ import string_join, string_concat, string_buffer"
    for n in [10, 100, 1000, 10000]:
        number = max((100000 // n, 1))

        join_timer = timeit.Timer("string_join(%d)" % n, setup=setup)
        buffer_timer = timeit.Timer("string_buffer(%d)" % n, setup=setup)
        concat_timer = timeit.Timer("string_concat(%d)" % n, setup=setup)

        time_with_std = functools.partial(_time_with_std, number=number, k=11)
        timing_results = map(time_with_std, (join_timer, buffer_timer, concat_timer))
        formatted_strings = itertools.starmap(_format_mean_std, timing_results)

        outputs.append((n,) + tuple(formatted_strings))

    if csv_mode:
        writer = csv.writer(sys.stdout)
        writer.writerows(outputs)
    else:
        header = "{:^20} | {:^20} | {:^20} | {:^20}".format(
            "# components", "join_time (μs)", "buffer_time (μs)", "concat_time (μs)"
        )
        print(header)
        print("-" * len(header))
        for results in outputs:
            print("{:<20} | {:^20} | {:^20} | {:^20}".format(*results))


if __name__ == "__main__":
    # Don't want to bother with a real argparse for this...
    csv_mode = len(sys.argv) == 2 and sys.argv[1] == "--csv"
    main(csv_mode)

You can download the code here.