File tree 4 files changed +21
-25
lines changed
4 files changed +21
-25
lines changed Original file line number Diff line number Diff line change @@ -144,24 +144,23 @@ __global__ void gpu_coulomb_fock(
144
144
145
145
prefactor_mo = (double )(cc * PI52 * ovlap) / zeta;
146
146
}
147
- __shared__ uint term_start[3 ];
147
+ uint term_start[3 ];
148
148
term_start[0 ] = 0 ;
149
149
term_start[1 ] = p_offset;
150
150
term_start[2 ] = d_offset;
151
- __shared__ uint term_end[3 ];
151
+ uint term_end[3 ];
152
152
term_end[0 ] = s_end;
153
153
term_end[1 ] = p_end;
154
154
term_end[2 ] = d_end;
155
- __shared__ uint inner_stop[3 ];
155
+ uint inner_stop[3 ];
156
156
inner_stop[0 ] = QMMM_BLOCK_SIZE;
157
157
inner_stop[1 ] = 126 ;
158
158
inner_stop[2 ] = 126 ;
159
- __shared__ uint inner_step[3 ];
159
+ uint inner_step[3 ];
160
160
inner_step[0 ] = 1 ;
161
161
inner_step[1 ] = 3 ;
162
162
inner_step[2 ] = 6 ;
163
163
164
- #pragma unroll 3
165
164
for (int func_type = 0 ; func_type < 3 ; func_type++) {
166
165
//
167
166
// Outer loop: read in block of MM atom information into shared memory
Original file line number Diff line number Diff line change @@ -187,25 +187,24 @@ __global__ void gpu_coulomb_fit1(uint num_terms,
187
187
188
188
prefactor_mo = (double )(cc * PI52 * ovlap) / zeta;
189
189
}
190
- __shared__ uint term_start[3 ];
190
+ uint term_start[3 ];
191
191
term_start[0 ] = 0 ;
192
192
term_start[1 ] = p_offset;
193
193
term_start[2 ] = d_offset;
194
- __shared__ uint term_end[3 ];
194
+ uint term_end[3 ];
195
195
term_end[0 ] = s_end;
196
196
term_end[1 ] = p_end;
197
197
term_end[2 ] = d_end;
198
- __shared__ uint inner_stop[3 ];
198
+ uint inner_stop[3 ];
199
199
inner_stop[0 ] = QMMM_BLOCK_SIZE;
200
200
inner_stop[1 ] = 126 ;
201
201
inner_stop[2 ] = 126 ;
202
- __shared__ uint inner_step[3 ];
202
+ uint inner_step[3 ];
203
203
inner_step[0 ] = 1 ;
204
204
inner_step[1 ] = 3 ;
205
205
inner_step[2 ] = 6 ;
206
206
207
207
uint rc_ind = 0 ;
208
- #pragma unroll 3
209
208
for (int func_type = 0 ; func_type < 3 ; func_type++) {
210
209
//
211
210
// Outer loop: read in block of MM atom information into shared memory
Original file line number Diff line number Diff line change @@ -210,24 +210,23 @@ __global__ void gpu_coulomb_forces(
210
210
211
211
prefactor_mo = (double )(cc * PI52 * ovlap) / zeta;
212
212
}
213
- __shared__ uint term_start[3 ];
213
+ uint term_start[3 ];
214
214
term_start[0 ] = 0 ;
215
215
term_start[1 ] = p_offset;
216
216
term_start[2 ] = d_offset;
217
- __shared__ uint term_end[3 ];
217
+ uint term_end[3 ];
218
218
term_end[0 ] = s_end;
219
219
term_end[1 ] = p_end;
220
220
term_end[2 ] = d_end;
221
- __shared__ uint inner_stop[3 ];
221
+ uint inner_stop[3 ];
222
222
inner_stop[0 ] = QMMM_BLOCK_SIZE;
223
223
inner_stop[1 ] = 126 ;
224
224
inner_stop[2 ] = 126 ;
225
- __shared__ uint inner_step[3 ];
225
+ uint inner_step[3 ];
226
226
inner_step[0 ] = 1 ;
227
227
inner_step[1 ] = 3 ;
228
228
inner_step[2 ] = 6 ;
229
229
230
- #pragma unroll 3
231
230
for (int func_type = 0 ; func_type < 3 ; func_type++) {
232
231
//
233
232
// Outer loop: read in block of MM atom information into shared memory
Original file line number Diff line number Diff line change @@ -187,22 +187,21 @@ void OSIntegral<scalar_type>::new_cutoff(void) {
187
187
}
188
188
}
189
189
// Pad the input arrays so the next term type has an aligned offset
190
- for (j = 0 ; j < QMMM_BLOCK_SIZE -
191
- (term_type_counts[current_term_type] % QMMM_BLOCK_SIZE);
192
- j++) {
193
- this ->func_code .push_back (
194
- func_code[term_type_offsets[current_term_type]]); // Use the first
195
- // code from this
196
- // term type
197
- this ->local_dens .push_back (
198
- local_dens[term_type_offsets[current_term_type]]);
199
- }
200
190
if (term_type_counts[current_term_type] > 0 ) {
191
+ for (j = 0 ; j < QMMM_BLOCK_SIZE - (term_type_counts[current_term_type] % QMMM_BLOCK_SIZE); j++) {
192
+ this ->func_code .push_back (func_code[term_type_offsets[current_term_type]]);
193
+ // Use the first code from this term type
194
+ this ->local_dens .push_back (local_dens[term_type_offsets[current_term_type]]);
195
+ }
201
196
for (j = 0 ; j < QMMM_BLOCK_SIZE - (dens_counts[current_term_type] % QMMM_BLOCK_SIZE); j++) {
202
197
this ->dens_values .push_back (dens_values[dens_offsets[current_term_type]]);
203
198
this ->local2globaldens .push_back (local2globaldens[dens_offsets[current_term_type]]);
204
199
}
205
200
} else {
201
+ for (j = 0 ; j < QMMM_BLOCK_SIZE - (dens_counts[current_term_type] % QMMM_BLOCK_SIZE); j++) {
202
+ this ->func_code .push_back (0 );
203
+ this ->local_dens .push_back (0 );
204
+ }
206
205
for (j = 0 ; j < QMMM_BLOCK_SIZE - (dens_counts[current_term_type] % QMMM_BLOCK_SIZE); j++) {
207
206
this ->dens_values .push_back (0 .0f );
208
207
this ->local2globaldens .push_back (0 );
You can’t perform that action at this time.
0 commit comments