J
Jerry Coffin
[ ... ]
Well no, he didn't really even do that. The shortcomings in his
testing mean that all he did was raise the possibility that it
_could_ be the case. But despite having a multitude of problems
pointed out very explicitly and clearly, he has not posted results
from a test that makes any attempt at taking those problems into
account. What we're left with is a claim but no data that really
supports it.
Just for fun, I'm going to post an even more comprehensive test
program. This one uses container sizes from 10 to 100 (in steps of
10), but weights the number of iterations toward the smaller
containers -- the number of iterations for any given size is
'iterations/size'. This emphasizes smaller sizes, but takes larger
sizes into account as well. This one does the test for list, vector
and deque, and uses all three strategies I previously outlined (clear
(), delete/new, accumulate inside the loop) for dealing with creating
an empty container for each iteration of the outer loop.
#include <vector>
#include <list>
#include <time.h>
#include <iostream>
#include <numeric>
#include <deque>
#include <string>
const size_t iterations = 3000000;
template <class T>
struct test1 {
clock_t operator()(size_t size) {
T coll;
clock_t start = clock();
for (int j=0; j<iterations/size; j++) {
coll.clear();
for (int i=0; i<size; i++)
coll.push_back(i);
}
clock_t total_time = clock()-start;
int total = std::accumulate(coll.begin(), coll.end(), 0);
return total_time;
}
};
template <class T>
struct test2 {
clock_t operator()(size_t size) {
T *coll = NULL;
clock_t start = clock();
for (size_t j=0; j<iterations/size; j++) {
delete coll;
coll = new T;
for (size_t i=0; i<size; i++)
coll->push_back(i);
}
clock_t total_time = clock() - start;
int total=std::accumulate(coll->begin(), coll->end(), 0);
delete coll;
return total_time;
}
};
template <class T>
struct test3 {
clock_t operator()(size_t size) {
clock_t start = clock();
for (size_t j=0; j<iterations/size; j++) {
T coll;
for (size_t i=0; i<size; i++)
coll.push_back(i);
int total = std::accumulate(coll.begin(), coll.end(), 0);
}
return clock()-start;
}
};
void show_time(std::string const &caption, clock_t t) {
std::cout << caption << "\t" << t << std::endl;
}
int main() {
clock_t times[3] = {0};
char const *labels[3] = {"list:", "vector:", "deque:"};
const size_t start = 10, stop=100, step=10;
for (size_t i=start; i<stop; i+=step) {
times[0] += test1<std::list<int> >()(i);
times[1] += test1<std::vector<int> >()(i);
times[2] += test1<std::deque<int> >()(i);
}
std::cout << "Times using clear():\n";
for (int i=0; i<3; i++)
show_time(labels, times);
std::fill_n(times, 3, clock_t(0));
for (size_t i=start; i<stop; i+=step) {
times[0] += test2<std::list<int> >()(i);
times[1] += test2<std::vector<int> >()(i);
times[2] += test2<std::deque<int> >()(i);
}
std::cout << "Times using delete/new:\n";
for (int i=0; i<3; i++)
show_time(labels, times);
std::fill_n(times, 3, clock_t(0));
for (size_t i=start; i<stop; i+=step) {
times[0] += test3<std::list<int> >()(i);
times[1] += test3<std::vector<int> >()(i);
times[2] += test3<std::deque<int> >()(i);
}
std::cout << "Times using accumulate inside loop:\n";
for (int i=0; i<3; i++)
show_time(labels, times);
return (0);
}
Here are results from VC++ 9:
Times using clear():
list: 4104
vector: 123
deque: 1420
Times using delete/new:
list: 4572
vector: 1981
deque: 1730
Times using accumulate inside loop:
list: 4462
vector: 1935
deque: 1637
Just for fun, I also compiled it with Microsoft's 64-bit compiler,
and got these results:
Times using clear():
list: 2183
vector: 126
deque: 889
Times using delete/new:
list: 2371
vector: 1405
deque: 1060
Times using accumulate inside loop:
list: 2355
vector: 1280
deque: 1076
In both cases, the following compiler flags were used:
/O2b2 /GL /EHsc
/O2b2 turns on full optimization with automatic inlining
/GL turns on "whole-program" optimization
/EHsc enable exception handling
Conclusions:
1) vector gets the single fastest result (by a wide margin).
2) deque is the most dependably fast (wins the most often).
3) list does a lot better with the 64-bit compiler.
3a) but it's still consistently the slowest, by ~1.7:1 at best.
In the OP's use case, Sam took it mean a small number of ints, ~10.
Sam did correctly show that inserting ~10 ints into a std::list is
faster than std::vector for whatever platform, compiler, etc., he was
using.
Well no, he didn't really even do that. The shortcomings in his
testing mean that all he did was raise the possibility that it
_could_ be the case. But despite having a multitude of problems
pointed out very explicitly and clearly, he has not posted results
from a test that makes any attempt at taking those problems into
account. What we're left with is a claim but no data that really
supports it.
Just for fun, I'm going to post an even more comprehensive test
program. This one uses container sizes from 10 to 100 (in steps of
10), but weights the number of iterations toward the smaller
containers -- the number of iterations for any given size is
'iterations/size'. This emphasizes smaller sizes, but takes larger
sizes into account as well. This one does the test for list, vector
and deque, and uses all three strategies I previously outlined (clear
(), delete/new, accumulate inside the loop) for dealing with creating
an empty container for each iteration of the outer loop.
#include <vector>
#include <list>
#include <time.h>
#include <iostream>
#include <numeric>
#include <deque>
#include <string>
const size_t iterations = 3000000;
template <class T>
struct test1 {
clock_t operator()(size_t size) {
T coll;
clock_t start = clock();
for (int j=0; j<iterations/size; j++) {
coll.clear();
for (int i=0; i<size; i++)
coll.push_back(i);
}
clock_t total_time = clock()-start;
int total = std::accumulate(coll.begin(), coll.end(), 0);
return total_time;
}
};
template <class T>
struct test2 {
clock_t operator()(size_t size) {
T *coll = NULL;
clock_t start = clock();
for (size_t j=0; j<iterations/size; j++) {
delete coll;
coll = new T;
for (size_t i=0; i<size; i++)
coll->push_back(i);
}
clock_t total_time = clock() - start;
int total=std::accumulate(coll->begin(), coll->end(), 0);
delete coll;
return total_time;
}
};
template <class T>
struct test3 {
clock_t operator()(size_t size) {
clock_t start = clock();
for (size_t j=0; j<iterations/size; j++) {
T coll;
for (size_t i=0; i<size; i++)
coll.push_back(i);
int total = std::accumulate(coll.begin(), coll.end(), 0);
}
return clock()-start;
}
};
void show_time(std::string const &caption, clock_t t) {
std::cout << caption << "\t" << t << std::endl;
}
int main() {
clock_t times[3] = {0};
char const *labels[3] = {"list:", "vector:", "deque:"};
const size_t start = 10, stop=100, step=10;
for (size_t i=start; i<stop; i+=step) {
times[0] += test1<std::list<int> >()(i);
times[1] += test1<std::vector<int> >()(i);
times[2] += test1<std::deque<int> >()(i);
}
std::cout << "Times using clear():\n";
for (int i=0; i<3; i++)
show_time(labels, times);
std::fill_n(times, 3, clock_t(0));
for (size_t i=start; i<stop; i+=step) {
times[0] += test2<std::list<int> >()(i);
times[1] += test2<std::vector<int> >()(i);
times[2] += test2<std::deque<int> >()(i);
}
std::cout << "Times using delete/new:\n";
for (int i=0; i<3; i++)
show_time(labels, times);
std::fill_n(times, 3, clock_t(0));
for (size_t i=start; i<stop; i+=step) {
times[0] += test3<std::list<int> >()(i);
times[1] += test3<std::vector<int> >()(i);
times[2] += test3<std::deque<int> >()(i);
}
std::cout << "Times using accumulate inside loop:\n";
for (int i=0; i<3; i++)
show_time(labels, times);
return (0);
}
Here are results from VC++ 9:
Times using clear():
list: 4104
vector: 123
deque: 1420
Times using delete/new:
list: 4572
vector: 1981
deque: 1730
Times using accumulate inside loop:
list: 4462
vector: 1935
deque: 1637
Just for fun, I also compiled it with Microsoft's 64-bit compiler,
and got these results:
Times using clear():
list: 2183
vector: 126
deque: 889
Times using delete/new:
list: 2371
vector: 1405
deque: 1060
Times using accumulate inside loop:
list: 2355
vector: 1280
deque: 1076
In both cases, the following compiler flags were used:
/O2b2 /GL /EHsc
/O2b2 turns on full optimization with automatic inlining
/GL turns on "whole-program" optimization
/EHsc enable exception handling
Conclusions:
1) vector gets the single fastest result (by a wide margin).
2) deque is the most dependably fast (wins the most often).
3) list does a lot better with the 64-bit compiler.
3a) but it's still consistently the slowest, by ~1.7:1 at best.