Skip to content

Commit 17b9db2

Browse files
committed
Merge pull request #483 from wernsaar/develop
added Steamroller as a cpu target
2 parents cbb3ab8 + 0dc559e commit 17b9db2

18 files changed

+261
-15
lines changed

Makefile.system

+7-1
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,9 @@ endif
6161
ifeq ($(TARGET), PILEDRIVER)
6262
GETARCH_FLAGS := -DFORCE_BARCELONA
6363
endif
64+
ifeq ($(TARGET), STEAMROLLER)
65+
GETARCH_FLAGS := -DFORCE_BARCELONA
66+
endif
6467
endif
6568

6669

@@ -85,6 +88,9 @@ endif
8588
ifeq ($(TARGET_CORE), PILEDRIVER)
8689
GETARCH_FLAGS := -DFORCE_BARCELONA
8790
endif
91+
ifeq ($(TARGET_CORE), STEAMROLLER)
92+
GETARCH_FLAGS := -DFORCE_BARCELONA
93+
endif
8894
endif
8995

9096

@@ -392,7 +398,7 @@ endif
392398
ifeq ($(ARCH), x86_64)
393399
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
394400
ifneq ($(NO_AVX), 1)
395-
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER
401+
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER
396402
endif
397403
ifneq ($(NO_AVX2), 1)
398404
DYNAMIC_CORE += HASWELL

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ Please read GotoBLAS_01Readme.txt
6060
- **AMD Bobcat**: Used GotoBLAS2 Barcelona codes.
6161
- **AMD Bulldozer**: x86-64 ?GEMM FMA4 kernels. (Thank Werner Saar)
6262
- **AMD PILEDRIVER**: Uses Bulldozer codes with some optimizations.
63+
- **AMD STEAMROLLER**: Uses Bulldozer codes with some optimizations.
6364

6465
#### MIPS64:
6566
- **ICT Loongson 3A**: Optimized Level-3 BLAS and the part of Level-1,2.

TargetList.txt

+1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ ISTANBUL
3232
BOBCAT
3333
BULLDOZER
3434
PILEDRIVER
35+
STEAMROLLER
3536

3637
c)VIA CPU:
3738
SSE_GENERIC

common_x86.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
171171
#define MMXSTORE movd
172172
#endif
173173

174-
#if defined(PILEDRIVER) || defined(BULLDOZER)
174+
#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER)
175175
//Enable some optimazation for barcelona.
176176
#define BARCELONA_OPTIMIZATION
177177
#endif

common_x86_64.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
226226

227227
#ifdef ASSEMBLER
228228

229-
#if defined(PILEDRIVER) || defined(BULLDOZER)
229+
#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER)
230230
//Enable some optimazation for barcelona.
231231
#define BARCELONA_OPTIMIZATION
232232
#endif

cpuid.h

+6-4
Original file line numberDiff line numberDiff line change
@@ -104,10 +104,11 @@
104104
#define CORE_ATOM 18
105105
#define CORE_NANO 19
106106
#define CORE_SANDYBRIDGE 20
107-
#define CORE_BOBCAT 21
108-
#define CORE_BULLDOZER 22
107+
#define CORE_BOBCAT 21
108+
#define CORE_BULLDOZER 22
109109
#define CORE_PILEDRIVER 23
110-
#define CORE_HASWELL 24
110+
#define CORE_HASWELL 24
111+
#define CORE_STEAMROLLER 25
111112

112113
#define HAVE_SSE (1 << 0)
113114
#define HAVE_SSE2 (1 << 1)
@@ -200,6 +201,7 @@ typedef struct {
200201
#define CPUTYPE_BOBCAT 45
201202
#define CPUTYPE_BULLDOZER 46
202203
#define CPUTYPE_PILEDRIVER 47
203-
#define CPUTYPE_HASWELL 48
204+
#define CPUTYPE_HASWELL 48
205+
#define CPUTYPE_STEAMROLLER 49
204206

205207
#endif

cpuid_x86.c

+18
Original file line numberDiff line numberDiff line change
@@ -1162,6 +1162,12 @@ int get_cpuname(void){
11621162
return CPUTYPE_PILEDRIVER;
11631163
else
11641164
return CPUTYPE_BARCELONA; //OS don't support AVX.
1165+
case 0:
1166+
if(support_avx())
1167+
return CPUTYPE_STEAMROLLER;
1168+
else
1169+
return CPUTYPE_BARCELONA; //OS don't support AVX.
1170+
11651171
}
11661172
break;
11671173
case 5:
@@ -1290,6 +1296,7 @@ static char *cpuname[] = {
12901296
"BULLDOZER",
12911297
"PILEDRIVER",
12921298
"HASWELL",
1299+
"STEAMROLLER",
12931300
};
12941301

12951302
static char *lowercpuname[] = {
@@ -1341,6 +1348,7 @@ static char *lowercpuname[] = {
13411348
"bulldozer",
13421349
"piledriver",
13431350
"haswell",
1351+
"steamroller",
13441352
};
13451353

13461354
static char *corename[] = {
@@ -1369,6 +1377,7 @@ static char *corename[] = {
13691377
"BULLDOZER",
13701378
"PILEDRIVER",
13711379
"HASWELL",
1380+
"STEAMROLLER",
13721381
};
13731382

13741383
static char *corename_lower[] = {
@@ -1397,6 +1406,7 @@ static char *corename_lower[] = {
13971406
"bulldozer",
13981407
"piledriver",
13991408
"haswell",
1409+
"steamroller",
14001410
};
14011411

14021412

@@ -1562,7 +1572,15 @@ int get_coretype(void){
15621572
return CORE_PILEDRIVER;
15631573
else
15641574
return CORE_BARCELONA; //OS don't support AVX.
1575+
1576+
case 0:
1577+
if(support_avx())
1578+
return CORE_STEAMROLLER;
1579+
else
1580+
return CORE_BARCELONA; //OS don't support AVX.
15651581
}
1582+
1583+
15661584
}else return CORE_BARCELONA;
15671585
}
15681586
}

driver/others/dynamic.c

+16-1
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ extern gotoblas_t gotoblas_BOBCAT;
6666
extern gotoblas_t gotoblas_SANDYBRIDGE;
6767
extern gotoblas_t gotoblas_BULLDOZER;
6868
extern gotoblas_t gotoblas_PILEDRIVER;
69+
extern gotoblas_t gotoblas_STEAMROLLER;
6970
#ifdef NO_AVX2
7071
#define gotoblas_HASWELL gotoblas_SANDYBRIDGE
7172
#else
@@ -77,6 +78,7 @@ extern gotoblas_t gotoblas_HASWELL;
7778
#define gotoblas_HASWELL gotoblas_NEHALEM
7879
#define gotoblas_BULLDOZER gotoblas_BARCELONA
7980
#define gotoblas_PILEDRIVER gotoblas_BARCELONA
81+
#define gotoblas_STEAMROLLER gotoblas_BARCELONA
8082
#endif
8183

8284

@@ -275,7 +277,17 @@ static gotoblas_t *get_coretype(void){
275277
openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
276278
return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
277279
}
280+
}else if(model == 0){
281+
//AMD STEAMROLLER
282+
if(support_avx())
283+
return &gotoblas_STEAMROLLER;
284+
else{
285+
openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
286+
return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
287+
}
278288
}
289+
290+
279291
} else {
280292
return &gotoblas_BARCELONA;
281293
}
@@ -315,6 +327,7 @@ static char *corename[] = {
315327
"Bulldozer",
316328
"Piledriver",
317329
"Haswell",
330+
"Steamroller",
318331
};
319332

320333
char *gotoblas_corename(void) {
@@ -339,6 +352,7 @@ char *gotoblas_corename(void) {
339352
if (gotoblas == &gotoblas_BULLDOZER) return corename[18];
340353
if (gotoblas == &gotoblas_PILEDRIVER) return corename[19];
341354
if (gotoblas == &gotoblas_HASWELL) return corename[20];
355+
if (gotoblas == &gotoblas_STEAMROLLER) return corename[21];
342356

343357
return corename[0];
344358
}
@@ -351,7 +365,7 @@ static gotoblas_t *force_coretype(char *coretype){
351365
char message[128];
352366
char mname[20];
353367

354-
for ( i=1 ; i <= 20; i++)
368+
for ( i=1 ; i <= 21; i++)
355369
{
356370
if (!strncasecmp(coretype,corename[i],20))
357371
{
@@ -370,6 +384,7 @@ static gotoblas_t *force_coretype(char *coretype){
370384
switch (found)
371385
{
372386

387+
case 21: return (&gotoblas_STEAMROLLER);
373388
case 20: return (&gotoblas_HASWELL);
374389
case 19: return (&gotoblas_PILEDRIVER);
375390
case 18: return (&gotoblas_BULLDOZER);

driver/others/parameter.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ int get_L2_size(void){
166166
#if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) || \
167167
defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \
168168
defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) || \
169-
defined(PILEDRIVER) || defined(HASWELL)
169+
defined(PILEDRIVER) || defined(HASWELL) || defined(STEAMROLLER)
170170

171171
cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
172172

@@ -251,7 +251,7 @@ void blas_set_parameter(void){
251251

252252
env_var_t p;
253253
int factor;
254-
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(SANDYBRIDGE) || defined(NEHALEM) || defined(HASWELL)
254+
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(SANDYBRIDGE) || defined(NEHALEM) || defined(HASWELL) || defined(STEAMROLLER)
255255
int size = 16;
256256
#else
257257
int size = get_L2_size();

getarch.c

+17
Original file line numberDiff line numberDiff line change
@@ -432,6 +432,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
432432
#define CORENAME "PILEDRIVER"
433433
#endif
434434

435+
#if defined (FORCE_STEAMROLLER)
436+
#define FORCE
437+
#define FORCE_INTEL
438+
#define ARCHITECTURE "X86"
439+
#define SUBARCHITECTURE "STEAMROLLER"
440+
#define ARCHCONFIG "-DSTEAMROLLER " \
441+
"-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=64 " \
442+
"-DL2_SIZE=2097152 -DL2_LINESIZE=64 -DL3_SIZE=12582912 " \
443+
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
444+
"-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 " \
445+
"-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU -DHAVE_CFLUSH " \
446+
"-DHAVE_AVX -DHAVE_FMA4 -DHAVE_FMA3"
447+
#define LIBNAME "steamroller"
448+
#define CORENAME "STEAMROLLER"
449+
#endif
450+
451+
435452
#ifdef FORCE_SSE_GENERIC
436453
#define FORCE
437454
#define FORCE_INTEL

kernel/setparam-ref.c

+17
Original file line numberDiff line numberDiff line change
@@ -941,6 +941,23 @@ static void init_parameter(void) {
941941
#endif
942942
#endif
943943

944+
#ifdef STEAMROLLER
945+
946+
#ifdef DEBUG
947+
fprintf(stderr, "Steamroller\n");
948+
#endif
949+
950+
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
951+
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
952+
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
953+
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
954+
#ifdef EXPRECISION
955+
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
956+
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
957+
#endif
958+
#endif
959+
960+
944961
#ifdef NANO
945962

946963
#ifdef DEBUG

kernel/x86_64/KERNEL.STEAMROLLER

+76
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
SGEMVNKERNEL = sgemv_n_4.c
2+
SGEMVTKERNEL = sgemv_t_4.c
3+
4+
ZGEMVNKERNEL = zgemv_n_dup.S
5+
ZGEMVTKERNEL = zgemv_t_4.c
6+
7+
DGEMVNKERNEL = dgemv_n_bulldozer.S
8+
DGEMVTKERNEL = dgemv_t_bulldozer.S
9+
10+
DDOTKERNEL = ddot_bulldozer.S
11+
DCOPYKERNEL = dcopy_bulldozer.S
12+
13+
SGEMMKERNEL = sgemm_kernel_16x2_piledriver.S
14+
SGEMMINCOPY = ../generic/gemm_ncopy_16.c
15+
SGEMMITCOPY = ../generic/gemm_tcopy_16.c
16+
SGEMMONCOPY = gemm_ncopy_2_bulldozer.S
17+
SGEMMOTCOPY = gemm_tcopy_2_bulldozer.S
18+
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
19+
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
20+
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
21+
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
22+
23+
DGEMMKERNEL = dgemm_kernel_8x2_piledriver.S
24+
DGEMMINCOPY = dgemm_ncopy_8_bulldozer.S
25+
DGEMMITCOPY = dgemm_tcopy_8_bulldozer.S
26+
DGEMMONCOPY = gemm_ncopy_2_bulldozer.S
27+
DGEMMOTCOPY = gemm_tcopy_2_bulldozer.S
28+
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
29+
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
30+
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
31+
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
32+
33+
CGEMMKERNEL = cgemm_kernel_4x2_piledriver.S
34+
CGEMMINCOPY = ../generic/zgemm_ncopy_4.c
35+
CGEMMITCOPY = ../generic/zgemm_tcopy_4.c
36+
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
37+
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
38+
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
39+
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
40+
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
41+
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
42+
ZGEMMKERNEL = zgemm_kernel_2x2_piledriver.S
43+
ZGEMMINCOPY =
44+
ZGEMMITCOPY =
45+
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
46+
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
47+
ZGEMMINCOPYOBJ =
48+
ZGEMMITCOPYOBJ =
49+
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
50+
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
51+
52+
CGEMM3MKERNEL = zgemm3m_kernel_8x4_barcelona.S
53+
ZGEMM3MKERNEL = zgemm3m_kernel_4x4_barcelona.S
54+
55+
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
56+
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
57+
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
58+
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
59+
60+
61+
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
62+
DTRSMKERNEL_LT = dtrsm_kernel_LT_8x2_bulldozer.S
63+
DTRSMKERNEL_RN = dtrsm_kernel_RN_8x2_bulldozer.S
64+
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
65+
66+
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
67+
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
68+
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
69+
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
70+
71+
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
72+
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
73+
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
74+
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
75+
76+

kernel/x86_64/ddot.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2929
#include "common.h"
3030

3131

32-
#if defined(BULLDOZER) || defined(PILEDRIVER)
32+
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER)
3333
#include "ddot_microk_bulldozer-2.c"
3434
#elif defined(NEHALEM)
3535
#include "ddot_microk_nehalem-2.c"

kernel/x86_64/sdot.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2828

2929
#include "common.h"
3030

31-
#if defined(BULLDOZER) || defined(PILEDRIVER)
31+
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER)
3232
#include "sdot_microk_bulldozer-2.c"
3333
#elif defined(NEHALEM)
3434
#include "sdot_microk_nehalem-2.c"

kernel/x86_64/sgemv_n_4.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2929
#include "common.h"
3030

3131

32-
#if defined(BULLDOZER) || defined(PILEDRIVER)
32+
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER)
3333
#include "sgemv_n_microk_bulldozer-4.c"
3434
#elif defined(NEHALEM)
3535
#include "sgemv_n_microk_nehalem-4.c"

kernel/x86_64/sgemv_t_4.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3030

3131
#if defined(NEHALEM)
3232
#include "sgemv_t_microk_nehalem-4.c"
33-
#elif defined(BULLDOZER) || defined(PILEDRIVER)
33+
#elif defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER)
3434
#include "sgemv_t_microk_bulldozer-4.c"
3535
#elif defined(SANDYBRIDGE)
3636
#include "sgemv_t_microk_sandy-4.c"

0 commit comments

Comments
 (0)