In order CPU och Out of order CPU- test i c++
När man läser om CPUer brukar man få reda på om de är "out-of-order" eller "in-order" dvs om de själva kan ändra på ordningen vid körning eller om det krävs en kompilator för detta. Dagen PC har alla out-of-order så det är svårt att hitta tydliga skillnader. Den sista med in-order var original Atom.
Mer spännande är det för ARM och Raspberry Pi. Jag har gjort några tester på Pi 3 med denna kod:
#include <ctime>
#include <iostream>
#include <math.h>
using namespace std;
int identity(int x) { return x; }
int sum1(int num) {
int a = 3, b, c, d, e, f, g, h, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, z1, z2, z3, z4, z5, z6 ;
for (int i = 0; i < 2000000; i++)
num += i;
t= r/a;
s= o/a;
r= a/a;
p= 2*a;
o= b/a;
h = b/a;
g= h%a;
f= b/a;
e= c%a;
d= c/a;
b=3*a;
c=a/a;
p= k+a+d;
q= p+b+m;
n= k+a;
m= 1+b+l;
l=3*a+k;
k=a+b+3;
t= r/a;
u = t/a;
v= h/a;
w= b/z6;
x= c/a;
y= c/a +p;
z=3*a;
z1=z5/a;
z2= k+a+d;
z3= p+b+m;
z4= k+a;
z5= 1+b+l;
a= a+b;
return num;
}
int sum2(int num) {
int a1 = 3, b1, c1, d1, e1, f1, g1, h1, k1, l1, m1, n1, o1, p1, q1, r1, s1, t1;
float x1, y1 ,z1, u1, w1, x3, y3 ,z3, u3, w3, x5, y5 ,z5, u5, w5;
for (int i = 0; i < 2000000; i++)
num += i;
t1= r1/s1;
s1= o1/p1;
r1= a1/b1;
p1= 2*a1;
o1= a1/b1;
h1 = b1/a1;
g1= h1/a1;
f1= b1/c1;
e1= c1/a1;
d1= c1/b1;
b1=3*a1;
c1=a1/b1;
p1= k1+a1+d1;
q1= p1+b1+m1;
n1= k1+a1;
m1= 1+b1+l1;
l1=3*a1+k1;
k1=a1+b1+3;
x1= sqrt (w1);
y1= 1/z1;
z1= (float) c1;
u1= 1/x1;
w1= (float) h1;
x3= sqrt (w1);
y3= 1/z1;
z3= (float) c1;
u3= 1/x1;
w3= (float) h1;
x5= sqrt (w1);
y5= 1/z3;
z5= (float) m1;
u5= 1/x1;
w5= (float) e1;
a1= a1+b1;
return num;
}
int sum3(int num) {
int j1 = 3, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15, j16, j17, j18;
float s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15 ;
for (int i = 0; i < 2000000; i++)
num += i;
j18= j17/j1;
j17= j16/j3;
j16= j15/j4;
j15= j1/j3;
j2 = j1/j1;
j3= j2/j1;
j4= j2/j2;
j5= j1/j2;
j6= j1/j3;
j7=3*j1;
j8=j1/j7;
j9 = j1+j2+j3;
j10= j1+j9+j5;
j11= j1+j10+j3;
j12= j1+j2;
j13= 1+j8+j4;
j14=3*j1+j11;
s1= sqrt (10);
s2= 1/s1;
s3= (float) j7;
s4= 1/s3;
s5= (float) j1;
s6= sqrt (10);
s7= 1/s1;
s8= (float) j7;
s9= 1/s3;
s10= (float) j3;
s11= sqrt (10);
s12= 1/s1;
s13= (float) j7;
s14= 1/s3;
s15= (float) j8;
j1=j1+j2;
return num;
}
int sum4(int num) {
int j1 = 3, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15, j16, j17, j18;
float s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15 ;
for (int i = 0; i < 2000000; i++)
num += i;
j7=3*j1;
j3= j2/j1;
j4= j2/j2;
j6= j1/j3;
j13= 1+j8+j4;
j8=j1/j7;
j9 = j1+j2+j3;
j10= j1+j9+j5;
j11= j1+j10+j3;
j12= j1+j2;
j5= j1/j2;
j14=3*j1;
j2 = j1/j1;
s11= sqrt (5+j2);
s9= 1/s3;
s2= 1/s1;
s3= (float) j7;
s4= 1/s3;
j18= j17/j1;
j17= j16/j3;
j16= j15/j4;
j15= j1/j3;
s6= sqrt (10);
s7= 1/s1;
s8= (float) j7;
s5= (float) j1;
s10= (float) j3;
s1= sqrt (10+j2);
s12= 1/s1;
s13= (float) j7;
s14= 1/s3;
s15= (float) j8;
j1= j1+j2;
return num;
}
int sum5(int num) {
float s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30;
for (int i = 0; i < 2000000; i++)
num += i;
s30= sqrt (5);
s29= 1/s3;
s28= 1/s1;
s27= sqrt(s30);
s26= sqrt (10);
s25= sqrt (5);
s24= 1/s3;
s23= 1/s1;
s22= 1/s30;
s21= 1/s3;
s20= sqrt(10);
s19=s20/s30;
s17=sqrt(s20);
s16=s15+s23;
s7= 1/s1;
s8= s7*s7;
s5= s1+s1;
s10= s3+s1;
s1= sqrt(10);
s12= 1/s1;
s13= s2/s3;
s14= 1/s3;
s15= 5;
s2=s1/s5;
s3=s10/s17;
s4=s5/s2;
return num;
}
double time_it(int (*action)(int), int arg) {
clock_t start_time = clock();
action(arg);
clock_t finis_time = clock();
return ((double) 1000*(finis_time - start_time)) /CLOCKS_PER_SEC;
}
int main() {
cout << "Identity(100) takes " << time_it(identity, 100) << " mseconds." << endl;
cout << "Sum1(100) takes " << time_it(sum1, 100) << " mseconds." << endl;
cout << "Sum2(100) takes " << time_it(sum2, 100) << " mseconds." << endl;
cout << "Sum3(100) takes " << time_it(sum3, 100) << " mseconds." << endl;
cout << "Sum4(100) takes " << time_it(sum4, 100) << " mseconds." << endl;
cout << "Sum5(100) takes " << time_it(sum4, 100) << " mseconds." << endl;
return 0;
}
Med A-53 så har vi in-order så utan optimering fungerar enbart Sum1.
Med -O1, -O2 eller -O3 går det bättre
RPi1, RPi2 och RPi3 har in-order medan RPi4 har "deeply-out-of-order".
Tinker Board gen 1 har A-17 som är out-of-order men inte "deeply".
Tinker Board gen 2 har A-53 dvs in-order.
Banana Pi har in-order.
Någon som vill testa denna kod?