Skip to content

Commit 547d8b1

Browse files
committed
1 parent a060814 commit 547d8b1

File tree

4 files changed

+571
-0
lines changed

4 files changed

+571
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,221 @@
1+
From 6497aae57c77253b2d717b01f5ec17e137954395 Mon Sep 17 00:00:00 2001
2+
From: Martin Kroeker <[email protected]>
3+
Date: Wed, 12 Jul 2017 20:43:09 +0200
4+
Subject: [PATCH] Use cpuid 4 with subleafs to query L1 cache size on Intel
5+
processors
6+
7+
---
8+
cpuid_x86.c | 117 ++++++++++++++++++++++++++++++++++++++++++++++++++++--------
9+
1 file changed, 102 insertions(+), 15 deletions(-)
10+
11+
diff --git a/cpuid_x86.c b/cpuid_x86.c
12+
index ab2ecdcaf..73b4df6b3 100644
13+
--- a/cpuid_x86.c
14+
+++ b/cpuid_x86.c
15+
@@ -71,12 +71,23 @@ void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
16+
*edx = cpuInfo[3];
17+
}
18+
19+
+void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, int *edx)
20+
+{
21+
+ int cpuInfo[4] = {-1};
22+
+ __cpuidex(cpuInfo, op, count);
23+
+ *eax = cpuInfo[0];
24+
+ *ebx = cpuInfo[1];
25+
+ *ecx = cpuInfo[2];
26+
+ *edx = cpuInfo[3];
27+
+}
28+
+
29+
#else
30+
31+
#ifndef CPUIDEMU
32+
33+
#if defined(__APPLE__) && defined(__i386__)
34+
void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx);
35+
+void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, int *edx);
36+
#else
37+
static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
38+
#if defined(__i386__) && defined(__PIC__)
39+
@@ -90,6 +101,19 @@ static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
40+
("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) : "cc");
41+
#endif
42+
}
43+
+
44+
+static C_INLINE void cpuid_count(int op, int count ,int *eax, int *ebx, int *ecx, int *edx){
45+
+#if defined(__i386__) && defined(__PIC__)
46+
+ __asm__ __volatile__
47+
+ ("mov %%ebx, %%edi;"
48+
+ "cpuid;"
49+
+ "xchgl %%ebx, %%edi;"
50+
+ : "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (op), "2" (count) : "cc");
51+
+#else
52+
+ __asm__ __volatile__
53+
+ ("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (op), "2" (count) : "cc");
54+
+#endif
55+
+}
56+
#endif
57+
58+
#else
59+
@@ -312,9 +336,9 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
60+
cpuid(0, &cpuid_level, &ebx, &ecx, &edx);
61+
62+
if (cpuid_level > 1) {
63+
-
64+
+ int numcalls =0 ;
65+
cpuid(2, &eax, &ebx, &ecx, &edx);
66+
-
67+
+ numcalls = BITMASK(eax, 0, 0xff); //FIXME some systems may require repeated calls to read all entries
68+
info[ 0] = BITMASK(eax, 8, 0xff);
69+
info[ 1] = BITMASK(eax, 16, 0xff);
70+
info[ 2] = BITMASK(eax, 24, 0xff);
71+
@@ -335,7 +359,6 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
72+
info[14] = BITMASK(edx, 24, 0xff);
73+
74+
for (i = 0; i < 15; i++){
75+
-
76+
switch (info[i]){
77+
78+
/* This table is from http://www.sandpile.org/ia32/cpuid.htm */
79+
@@ -637,12 +660,13 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
80+
LD1.linesize = 64;
81+
break;
82+
case 0x63 :
83+
- DTB.size = 2048;
84+
- DTB.associative = 4;
85+
- DTB.linesize = 32;
86+
- LDTB.size = 4096;
87+
- LDTB.associative= 4;
88+
- LDTB.linesize = 32;
89+
+ DTB.size = 2048;
90+
+ DTB.associative = 4;
91+
+ DTB.linesize = 32;
92+
+ LDTB.size = 4096;
93+
+ LDTB.associative= 4;
94+
+ LDTB.linesize = 32;
95+
+ break;
96+
case 0x66 :
97+
LD1.size = 8;
98+
LD1.associative = 4;
99+
@@ -675,12 +699,13 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
100+
LC1.associative = 8;
101+
break;
102+
case 0x76 :
103+
- ITB.size = 2048;
104+
- ITB.associative = 0;
105+
- ITB.linesize = 8;
106+
- LITB.size = 4096;
107+
- LITB.associative= 0;
108+
- LITB.linesize = 8;
109+
+ ITB.size = 2048;
110+
+ ITB.associative = 0;
111+
+ ITB.linesize = 8;
112+
+ LITB.size = 4096;
113+
+ LITB.associative= 0;
114+
+ LITB.linesize = 8;
115+
+ break;
116+
case 0x77 :
117+
LC1.size = 16;
118+
LC1.associative = 4;
119+
@@ -891,6 +916,68 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
120+
}
121+
122+
if (get_vendor() == VENDOR_INTEL) {
123+
+ if(LD1.size<=0 || LC1.size<=0){
124+
+ //If we didn't detect L1 correctly before,
125+
+ int count;
126+
+ for (count=0;count <4;count++) {
127+
+ cpuid_count(4, count, &eax, &ebx, &ecx, &edx);
128+
+ switch (eax &0x1f) {
129+
+ case 0:
130+
+ continue;
131+
+ case 1:
132+
+ case 3:
133+
+ {
134+
+ switch ((eax >>5) &0x07)
135+
+ {
136+
+ case 1:
137+
+ {
138+
+// fprintf(stderr,"L1 data cache...\n");
139+
+ int sets = ecx+1;
140+
+ int lines = (ebx & 0x0fff) +1;
141+
+ ebx>>=12;
142+
+ int part = (ebx&0x03ff)+1;
143+
+ ebx >>=10;
144+
+ int assoc = (ebx&0x03ff)+1;
145+
+ LD1.size = (assoc*part*lines*sets)/1024;
146+
+ LD1.associative = assoc;
147+
+ LD1.linesize= lines;
148+
+ break;
149+
+ }
150+
+ default:
151+
+ break;
152+
+ }
153+
+ break;
154+
+ }
155+
+ case 2:
156+
+ {
157+
+ switch ((eax >>5) &0x07)
158+
+ {
159+
+ case 1:
160+
+ {
161+
+// fprintf(stderr,"L1 instruction cache...\n");
162+
+ int sets = ecx+1;
163+
+ int lines = (ebx & 0x0fff) +1;
164+
+ ebx>>=12;
165+
+ int part = (ebx&0x03ff)+1;
166+
+ ebx >>=10;
167+
+ int assoc = (ebx&0x03ff)+1;
168+
+ LC1.size = (assoc*part*lines*sets)/1024;
169+
+ LC1.associative = assoc;
170+
+ LC1.linesize= lines;
171+
+ break;
172+
+ }
173+
+ default:
174+
+ break;
175+
+ }
176+
+ break;
177+
+
178+
+ }
179+
+ default:
180+
+ break;
181+
+ }
182+
+ }
183+
+ }
184+
+
185+
cpuid(0x80000000, &cpuid_level, &ebx, &ecx, &edx);
186+
if (cpuid_level >= 0x80000006) {
187+
if(L2.size<=0){
188+
189+
From 00774b1105ad5dbfe0e6be671096d51ad4a97b2e Mon Sep 17 00:00:00 2001
190+
From: Martin Kroeker <[email protected]>
191+
Date: Wed, 12 Jul 2017 21:56:23 +0200
192+
Subject: [PATCH] Add dummy implementation of cpuid_count for the CPUIDEMU case
193+
194+
---
195+
cpuid_x86.c | 5 ++++-
196+
1 file changed, 4 insertions(+), 1 deletion(-)
197+
198+
diff --git a/cpuid_x86.c b/cpuid_x86.c
199+
index 73b4df6b3..103128a33 100644
200+
--- a/cpuid_x86.c
201+
+++ b/cpuid_x86.c
202+
@@ -157,6 +157,10 @@ void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *
203+
*edx = idlist[current].d;
204+
}
205+
206+
+void cpuid_count (unsigned int op, unsigned int count, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) {
207+
+ return cpuid (op, eax, ebx, ecx, edx);
208+
+}
209+
+
210+
#endif
211+
212+
#endif // _MSC_VER
213+
@@ -977,7 +981,6 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
214+
}
215+
}
216+
}
217+
-
218+
cpuid(0x80000000, &cpuid_level, &ebx, &ecx, &edx);
219+
if (cpuid_level >= 0x80000006) {
220+
if(L2.size<=0){
221+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
From 88a35ff457f55e527e0e8a503a0dc61976c1846d Mon Sep 17 00:00:00 2001
2+
From: Martin Kroeker <[email protected]>
3+
Date: Tue, 25 Jul 2017 08:39:35 +0200
4+
Subject: [PATCH] Revert #1246, "honor cgroup/cpuset limits" for now
5+
6+
Unsafe usage of the __GLIBC_PREREQ macro lead to build breakage on non-glibc systems
7+
---
8+
driver/others/init.c | 49 +++++--------------------------------------------
9+
driver/others/memory.c | 37 -------------------------------------
10+
2 files changed, 5 insertions(+), 81 deletions(-)
11+
12+
diff --git a/driver/others/init.c b/driver/others/init.c
13+
index 4c75d72e4..3e6176967 100644
14+
--- a/driver/others/init.c
15+
+++ b/driver/others/init.c
16+
@@ -778,11 +778,11 @@ static int initialized = 0;
17+
void gotoblas_affinity_init(void) {
18+
19+
int cpu, num_avail;
20+
-#ifndef USE_OPENMP
21+
+#ifndef USE_OPENMP
22+
cpu_set_t cpu_mask;
23+
#endif
24+
int i;
25+
-
26+
+
27+
if (initialized) return;
28+
29+
initialized = 1;
30+
@@ -826,54 +826,15 @@ void gotoblas_affinity_init(void) {
31+
common -> shmid = pshmid;
32+
33+
if (common -> magic != SH_MAGIC) {
34+
- cpu_set_t *cpusetp;
35+
- int nums;
36+
- int ret;
37+
-
38+
#ifdef DEBUG
39+
fprintf(stderr, "Shared Memory Initialization.\n");
40+
#endif
41+
42+
//returns the number of processors which are currently online
43+
-
44+
- nums = sysconf(_SC_NPROCESSORS_CONF);
45+
-
46+
-#if !defined(__GLIBC_PREREQ) || !__GLIBC_PREREQ(2, 3)
47+
- common->num_procs = nums;
48+
-#elif __GLIBC_PREREQ(2, 7)
49+
- cpusetp = CPU_ALLOC(nums);
50+
- if (cpusetp == NULL) {
51+
- common->num_procs = nums;
52+
- } else {
53+
- size_t size;
54+
- size = CPU_ALLOC_SIZE(nums);
55+
- ret = sched_getaffinity(0,size,cpusetp);
56+
- if (ret!=0)
57+
- common->num_procs = nums;
58+
- else
59+
- common->num_procs = CPU_COUNT_S(size,cpusetp);
60+
- }
61+
- CPU_FREE(cpusetp);
62+
-#else
63+
- ret = sched_getaffinity(0,sizeof(cpu_set_t), cpusetp);
64+
- if (ret!=0) {
65+
- common->num_procs = nums;
66+
- } else {
67+
-#if !__GLIBC_PREREQ(2, 6)
68+
- int i;
69+
- int n = 0;
70+
- for (i=0;i<nums;i++)
71+
- if (CPU_ISSET(i,cpusetp)) n++;
72+
- common->num_procs = n;
73+
- }
74+
-#else
75+
- common->num_procs = CPU_COUNT(sizeof(cpu_set_t),cpusetp);
76+
-#endif
77+
-
78+
-#endif
79+
+ common -> num_procs = sysconf(_SC_NPROCESSORS_CONF);;
80+
81+
if(common -> num_procs > MAX_CPUS) {
82+
- fprintf(stderr, "\nOpenBLAS Warning : The number of CPU/Cores(%d) is beyond the limit(%d). Terminated.\n", common->num_procs, MAX_CPUS);
83+
+ fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(%d). Terminated.\n", common->num_procs, MAX_CPUS);
84+
exit(1);
85+
}
86+
87+
@@ -886,7 +847,7 @@ void gotoblas_affinity_init(void) {
88+
if (common -> num_nodes > 1) numa_mapping();
89+
90+
common -> final_num_procs = 0;
91+
- for(i = 0; i < common -> avail_count; i++) common -> final_num_procs += rcount(common -> avail[i]) + 1; //Make the max cpu number.
92+
+ for(i = 0; i < common -> avail_count; i++) common -> final_num_procs += rcount(common -> avail[i]) + 1; //Make the max cpu number.
93+
94+
for (cpu = 0; cpu < common -> final_num_procs; cpu ++) common -> cpu_use[cpu] = 0;
95+
96+
diff --git a/driver/others/memory.c b/driver/others/memory.c
97+
index 38d063715..916950315 100644
98+
--- a/driver/others/memory.c
99+
+++ b/driver/others/memory.c
100+
@@ -175,44 +175,7 @@ int get_num_procs(void);
101+
#else
102+
int get_num_procs(void) {
103+
static int nums = 0;
104+
-cpu_set_t *cpusetp;
105+
-size_t size;
106+
-int ret;
107+
-int i,n;
108+
-
109+
if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF);
110+
-#if !defined(OS_LINUX)
111+
- return nums;
112+
-#endif
113+
-
114+
-#if !defined(__GLIBC_PREREQ)
115+
- return nums;
116+
-#endif
117+
-#if !__GLIBC_PREREQ(2, 3)
118+
- return nums;
119+
-#endif
120+
-
121+
-#if !__GLIBC_PREREQ(2, 7)
122+
- ret = sched_getaffinity(0,sizeof(cpu_set_t), cpusetp);
123+
- if (ret!=0) return nums;
124+
- n=0;
125+
-#if !__GLIBC_PREREQ(2, 6)
126+
- for (i=0;i<nums;i++)
127+
- if (CPU_ISSET(i,cpusetp)) n++;
128+
- nums=n;
129+
-#else
130+
- nums = CPU_COUNT(sizeof(cpu_set_t),cpusetp);
131+
-#endif
132+
- return nums;
133+
-#endif
134+
-
135+
- cpusetp = CPU_ALLOC(nums);
136+
- if (cpusetp == NULL) return nums;
137+
- size = CPU_ALLOC_SIZE(nums);
138+
- ret = sched_getaffinity(0,size,cpusetp);
139+
- if (ret!=0) return nums;
140+
- nums = CPU_COUNT_S(size,cpusetp);
141+
- CPU_FREE(cpusetp);
142+
return nums;
143+
}
144+
#endif

0 commit comments

Comments
 (0)