2
2
#include "linsys.h"
3
3
4
4
/* In case of error abort freeing p */
5
- #define CUDSS_CHECK_ABORT (call , p , fname ) \
6
- do \
7
- { \
8
- cudssStatus_t status = call; \
9
- if (status != CUDSS_STATUS_SUCCESS) \
10
- { \
11
- scs_printf("CUDSS call " #fname " returned status = %d\n", status); \
12
- scs_free_lin_sys_work(p); \
13
- return SCS_NULL; \
14
- } \
5
+ #define CUDSS_CHECK_ABORT (call , p , fname ) \
6
+ do { \
7
+ cudssStatus_t status = call; \
8
+ if (status != CUDSS_STATUS_SUCCESS) { \
9
+ scs_printf("CUDSS call " #fname " returned status = %d\n", status); \
10
+ scs_free_lin_sys_work(p); \
11
+ return SCS_NULL; \
12
+ } \
15
13
} while (0);
16
14
17
15
/* In case of error abort freeing p */
18
- #define CUDA_CHECK_ABORT (call , p , fname ) \
19
- do \
20
- { \
21
- cudaError_t status = call; \
22
- if (status != cudaSuccess) \
23
- { \
24
- printf("CUDA call " #fname " returned status = %d\n", status); \
25
- scs_free_lin_sys_work(p); \
26
- return SCS_NULL; \
27
- } \
16
+ #define CUDA_CHECK_ABORT (call , p , fname ) \
17
+ do { \
18
+ cudaError_t status = call; \
19
+ if (status != cudaSuccess) { \
20
+ printf("CUDA call " #fname " returned status = %d\n", status); \
21
+ scs_free_lin_sys_work(p); \
22
+ return SCS_NULL; \
23
+ } \
28
24
} while (0);
29
25
30
26
/* Return the linear system method name */
31
- const char * scs_get_lin_sys_method ()
32
- {
27
+ const char * scs_get_lin_sys_method () {
33
28
return "sparse-direct-cuDSS" ;
34
29
}
35
30
36
31
/* Free allocated resources for the linear system solver */
37
- void scs_free_lin_sys_work (ScsLinSysWork * p )
38
- {
39
- if (p )
40
- {
32
+ void scs_free_lin_sys_work (ScsLinSysWork * p ) {
33
+ if (p ) {
41
34
/* Free GPU resources */
42
35
if (p -> d_kkt_val )
43
36
cudaFree (p -> d_kkt_val );
@@ -81,8 +74,7 @@ void scs_free_lin_sys_work(ScsLinSysWork *p)
81
74
82
75
/* Initialize the linear system solver workspace */
83
76
ScsLinSysWork * scs_init_lin_sys_work (const ScsMatrix * A , const ScsMatrix * P ,
84
- const scs_float * diag_r )
85
- {
77
+ const scs_float * diag_r ) {
86
78
ScsLinSysWork * p = scs_calloc (1 , sizeof (ScsLinSysWork ));
87
79
if (!p )
88
80
return SCS_NULL ;
@@ -94,31 +86,27 @@ ScsLinSysWork *scs_init_lin_sys_work(const ScsMatrix *A, const ScsMatrix *P,
94
86
95
87
/* Allocate CPU memory */
96
88
p -> sol = (scs_float * )scs_malloc (sizeof (scs_float ) * p -> n_plus_m );
97
- if (!p -> sol )
98
- {
89
+ if (!p -> sol ) {
99
90
scs_free_lin_sys_work (p );
100
91
return SCS_NULL ;
101
92
}
102
93
103
94
p -> diag_r_idxs = (scs_int * )scs_calloc (p -> n_plus_m , sizeof (scs_int ));
104
- if (!p -> diag_r_idxs )
105
- {
95
+ if (!p -> diag_r_idxs ) {
106
96
scs_free_lin_sys_work (p );
107
97
return SCS_NULL ;
108
98
}
109
99
110
100
p -> diag_p = (scs_float * )scs_calloc (p -> n , sizeof (scs_float ));
111
- if (!p -> diag_p )
112
- {
101
+ if (!p -> diag_p ) {
113
102
scs_free_lin_sys_work (p );
114
103
return SCS_NULL ;
115
104
}
116
105
117
106
/* Form KKT matrix as upper-triangular, CSC */
118
107
/* Because of symmetry it is equivalent to lower-triangular, CSR */
119
108
p -> kkt = SCS (form_kkt )(A , P , p -> diag_p , diag_r , p -> diag_r_idxs , 1 );
120
- if (!p -> kkt )
121
- {
109
+ if (!p -> kkt ) {
122
110
scs_printf ("Error in forming KKT matrix" );
123
111
scs_free_lin_sys_work (p );
124
112
return SCS_NULL ;
@@ -131,115 +119,128 @@ ScsLinSysWork *scs_init_lin_sys_work(const ScsMatrix *A, const ScsMatrix *P,
131
119
CUDSS_CHECK_ABORT (cudssCreate (& p -> handle ), p , "cudssCreate" );
132
120
/* Creating cuDSS solver configuration and data objects */
133
121
134
- CUDSS_CHECK_ABORT (cudssConfigCreate (& p -> solver_config ), p , "cudssConfigCreate" );
135
- CUDSS_CHECK_ABORT (cudssDataCreate (p -> handle , & p -> solver_data ), p , "cudssDataCreate" );
122
+ CUDSS_CHECK_ABORT (cudssConfigCreate (& p -> solver_config ), p ,
123
+ "cudssConfigCreate" );
124
+ CUDSS_CHECK_ABORT (cudssDataCreate (p -> handle , & p -> solver_data ), p ,
125
+ "cudssDataCreate" );
136
126
137
127
/* Allocate device memory for KKT matrix */
138
128
scs_int nnz = p -> kkt -> p [p -> n_plus_m ];
139
129
140
- CUDA_CHECK_ABORT (cudaMalloc ((void * * )& p -> d_kkt_val , nnz * sizeof (scs_float )), p , "cudaMalloc: kkt_val" );
141
- CUDA_CHECK_ABORT (cudaMalloc ((void * * )& p -> d_kkt_row_ptr , (p -> n_plus_m + 1 ) * sizeof (scs_int )), p , "cudaMalloc: kkt_row_ptr" );
142
- CUDA_CHECK_ABORT (cudaMalloc ((void * * )& p -> d_kkt_col_ind , nnz * sizeof (scs_int )), p , "cudaMalloc: kkt_col_ind" );
130
+ CUDA_CHECK_ABORT (cudaMalloc ((void * * )& p -> d_kkt_val , nnz * sizeof (scs_float )),
131
+ p , "cudaMalloc: kkt_val" );
132
+ CUDA_CHECK_ABORT (cudaMalloc ((void * * )& p -> d_kkt_row_ptr ,
133
+ (p -> n_plus_m + 1 ) * sizeof (scs_int )),
134
+ p , "cudaMalloc: kkt_row_ptr" );
135
+ CUDA_CHECK_ABORT (
136
+ cudaMalloc ((void * * )& p -> d_kkt_col_ind , nnz * sizeof (scs_int )), p ,
137
+ "cudaMalloc: kkt_col_ind" );
143
138
144
139
/* Copy KKT matrix to device */
145
140
/* Note: we treat column pointers (p->kkt->p) as row pointers on the device */
146
141
CUDA_CHECK_ABORT (cudaMemcpy (p -> d_kkt_val , p -> kkt -> x , nnz * sizeof (scs_float ),
147
142
cudaMemcpyHostToDevice ),
148
143
p , "cudaMemcpy: kkt_val" );
149
- CUDA_CHECK_ABORT (cudaMemcpy (p -> d_kkt_row_ptr , p -> kkt -> p , (p -> kkt -> n + 1 ) * sizeof (scs_int ),
144
+ CUDA_CHECK_ABORT (cudaMemcpy (p -> d_kkt_row_ptr , p -> kkt -> p ,
145
+ (p -> kkt -> n + 1 ) * sizeof (scs_int ),
150
146
cudaMemcpyHostToDevice ),
151
147
p , "cudaMemcpy: kkt_row_ptr" );
152
- CUDA_CHECK_ABORT (cudaMemcpy (p -> d_kkt_col_ind , p -> kkt -> i , nnz * sizeof ( scs_int ),
153
- cudaMemcpyHostToDevice ),
148
+ CUDA_CHECK_ABORT (cudaMemcpy (p -> d_kkt_col_ind , p -> kkt -> i ,
149
+ nnz * sizeof ( scs_int ), cudaMemcpyHostToDevice ),
154
150
p , "cudaMemcpy: kkt_col_ind" );
155
151
156
152
/* Create kkt matrix descriptor */
157
153
/* We pass the kkt matrix as symmetric, lower triangular */
158
154
cudssMatrixType_t mtype = CUDSS_MTYPE_SYMMETRIC ;
159
155
cudssMatrixViewType_t mview = CUDSS_MVIEW_LOWER ;
160
156
cudssIndexBase_t base = CUDSS_BASE_ZERO ;
161
- CUDSS_CHECK_ABORT (cudssMatrixCreateCsr (& p -> d_kkt_mat , p -> kkt -> m , p -> kkt -> n , nnz , p -> d_kkt_row_ptr ,
162
- NULL , p -> d_kkt_col_ind , p -> d_kkt_val , SCS_CUDA_INDEX , SCS_CUDA_FLOAT ,
163
- mtype , mview , base ),
157
+ CUDSS_CHECK_ABORT (cudssMatrixCreateCsr (
158
+ & p -> d_kkt_mat , p -> kkt -> m , p -> kkt -> n , nnz ,
159
+ p -> d_kkt_row_ptr , NULL , p -> d_kkt_col_ind , p -> d_kkt_val ,
160
+ SCS_CUDA_INDEX , SCS_CUDA_FLOAT , mtype , mview , base ),
164
161
p , "cudssMatrixCreateCsr" );
165
162
166
163
/* Allocate device memory for vectors */
167
- CUDA_CHECK_ABORT (cudaMalloc ((void * * )& p -> d_b , p -> n_plus_m * sizeof (scs_float )), p , "cudaMalloc: b" );
168
- CUDA_CHECK_ABORT (cudaMalloc ((void * * )& p -> d_sol , p -> n_plus_m * sizeof (scs_float )), p , "cudaMalloc: sol" );
164
+ CUDA_CHECK_ABORT (
165
+ cudaMalloc ((void * * )& p -> d_b , p -> n_plus_m * sizeof (scs_float )), p ,
166
+ "cudaMalloc: b" );
167
+ CUDA_CHECK_ABORT (
168
+ cudaMalloc ((void * * )& p -> d_sol , p -> n_plus_m * sizeof (scs_float )), p ,
169
+ "cudaMalloc: sol" );
169
170
170
171
/* Create RHS and solution matrix descriptors */
171
172
scs_int nrhs = 1 ;
172
- CUDSS_CHECK_ABORT (cudssMatrixCreateDn (& p -> d_b_mat , p -> n_plus_m , nrhs , p -> n_plus_m , p -> d_b ,
173
- SCS_CUDA_FLOAT , CUDSS_LAYOUT_COL_MAJOR ),
173
+ CUDSS_CHECK_ABORT (cudssMatrixCreateDn (& p -> d_b_mat , p -> n_plus_m , nrhs ,
174
+ p -> n_plus_m , p -> d_b , SCS_CUDA_FLOAT ,
175
+ CUDSS_LAYOUT_COL_MAJOR ),
174
176
p , "cudssMatrixCreateDn: b" );
175
- CUDSS_CHECK_ABORT (cudssMatrixCreateDn (& p -> d_sol_mat , p -> n_plus_m , nrhs , p -> n_plus_m , p -> d_sol ,
176
- SCS_CUDA_FLOAT , CUDSS_LAYOUT_COL_MAJOR ),
177
+ CUDSS_CHECK_ABORT (cudssMatrixCreateDn (& p -> d_sol_mat , p -> n_plus_m , nrhs ,
178
+ p -> n_plus_m , p -> d_sol , SCS_CUDA_FLOAT ,
179
+ CUDSS_LAYOUT_COL_MAJOR ),
177
180
p , "cudssMatrixCreateDn: sol" );
178
181
179
182
/* Symbolic factorization */
180
- CUDSS_CHECK_ABORT (cudssExecute (p -> handle , CUDSS_PHASE_ANALYSIS , p -> solver_config , p -> solver_data ,
181
- p -> d_kkt_mat , p -> d_sol_mat , p -> d_b_mat ),
183
+ CUDSS_CHECK_ABORT (cudssExecute (p -> handle , CUDSS_PHASE_ANALYSIS ,
184
+ p -> solver_config , p -> solver_data , p -> d_kkt_mat ,
185
+ p -> d_sol_mat , p -> d_b_mat ),
182
186
p , "cudssExecute: analysis" );
183
187
184
188
/* Numerical Factorization */
185
- CUDSS_CHECK_ABORT (cudssExecute (p -> handle , CUDSS_PHASE_FACTORIZATION , p -> solver_config , p -> solver_data ,
186
- p -> d_kkt_mat , p -> d_sol_mat , p -> d_b_mat ),
189
+ CUDSS_CHECK_ABORT (cudssExecute (p -> handle , CUDSS_PHASE_FACTORIZATION ,
190
+ p -> solver_config , p -> solver_data , p -> d_kkt_mat ,
191
+ p -> d_sol_mat , p -> d_b_mat ),
187
192
p , "cudssExecute: factorization" );
188
193
189
194
return p ;
190
195
}
191
196
192
197
/* Solve the linear system for a given RHS b */
193
198
scs_int scs_solve_lin_sys (ScsLinSysWork * p , scs_float * b , const scs_float * ws ,
194
- scs_float tol )
195
- {
199
+ scs_float tol ) {
196
200
/* Copy right-hand side to device */
197
201
cudaError_t custatus = cudaMemcpy (p -> d_b , b , p -> n_plus_m * sizeof (scs_float ),
198
202
cudaMemcpyHostToDevice );
199
- if (custatus != cudaSuccess )
200
- {
201
- scs_printf ( "scs_solve_lin_sys: Error copying `b` side to device: %d\n" , (int )custatus );
203
+ if (custatus != cudaSuccess ) {
204
+ scs_printf ( "scs_solve_lin_sys: Error copying `b` side to device: %d\n" ,
205
+ (int )custatus );
202
206
return custatus ;
203
207
}
204
208
205
209
// is this really needed?
206
210
cudssMatrixSetValues (p -> d_b_mat , p -> d_b );
207
211
208
212
/* Solve the system */
209
- cudssStatus_t status = cudssExecute (p -> handle , CUDSS_PHASE_SOLVE , p -> solver_config , p -> solver_data ,
210
- p -> d_kkt_mat , p -> d_sol_mat , p -> d_b_mat );
213
+ cudssStatus_t status =
214
+ cudssExecute (p -> handle , CUDSS_PHASE_SOLVE , p -> solver_config ,
215
+ p -> solver_data , p -> d_kkt_mat , p -> d_sol_mat , p -> d_b_mat );
211
216
212
- if (status != CUDSS_STATUS_SUCCESS )
213
- {
217
+ if (status != CUDSS_STATUS_SUCCESS ) {
214
218
scs_printf ("scs_solve_lin_sys: Error during solve: %d\n" , (int )status );
215
219
return status ;
216
220
}
217
221
218
222
/* Copy solution back to host */
219
223
custatus = cudaMemcpy (b , p -> d_sol , p -> n_plus_m * sizeof (scs_float ),
220
224
cudaMemcpyDeviceToHost );
221
- if (status != cudaSuccess )
222
- {
223
- scs_printf ( "scs_solve_lin_sys: Error copying d_sol to host: %d\n" , (int )status );
225
+ if (status != cudaSuccess ) {
226
+ scs_printf ( "scs_solve_lin_sys: Error copying d_sol to host: %d\n" ,
227
+ (int )status );
224
228
return status ;
225
229
}
226
230
227
231
return 0 ; /* Success */
228
232
}
229
233
230
234
/* Update the KKT matrix when R changes */
231
- void scs_update_lin_sys_diag_r (ScsLinSysWork * p , const scs_float * diag_r )
232
- {
235
+ void scs_update_lin_sys_diag_r (ScsLinSysWork * p , const scs_float * diag_r ) {
233
236
scs_int i ;
234
237
235
238
/* Update KKT matrix on CPU */
236
- for (i = 0 ; i < p -> n ; ++ i )
237
- {
239
+ for (i = 0 ; i < p -> n ; ++ i ) {
238
240
/* top left is R_x + P */
239
241
p -> kkt -> x [p -> diag_r_idxs [i ]] = p -> diag_p [i ] + diag_r [i ];
240
242
}
241
- for (i = p -> n ; i < p -> n + p -> m ; ++ i )
242
- {
243
+ for (i = p -> n ; i < p -> n + p -> m ; ++ i ) {
243
244
/* bottom right is -R_y */
244
245
p -> kkt -> x [p -> diag_r_idxs [i ]] = - diag_r [i ];
245
246
}
@@ -248,28 +249,31 @@ void scs_update_lin_sys_diag_r(ScsLinSysWork *p, const scs_float *diag_r)
248
249
cudaError_t custatus = cudaMemcpy (p -> d_kkt_val , p -> kkt -> x ,
249
250
p -> kkt -> p [p -> n_plus_m ] * sizeof (scs_float ),
250
251
cudaMemcpyHostToDevice );
251
- if (custatus != cudaSuccess )
252
- {
253
- scs_printf ("scs_update_lin_sys_diag_r: Error copying kkt->x to device: %d\n" , (int )custatus );
252
+ if (custatus != cudaSuccess ) {
253
+ scs_printf (
254
+ "scs_update_lin_sys_diag_r: Error copying kkt->x to device: %d\n" ,
255
+ (int )custatus );
254
256
return ;
255
257
}
256
258
257
259
/* Update the matrix values in cuDSS */
258
260
cudssStatus_t status ;
259
- status = cudssMatrixSetCsrPointers (p -> d_kkt_mat , p -> d_kkt_row_ptr , NULL , p -> d_kkt_col_ind ,
260
- p -> d_kkt_val );
261
- if (status != CUDSS_STATUS_SUCCESS )
262
- {
263
- scs_printf ("scs_update_lin_sys_diag_r: Error updating kkt matrix on device: %d\n" , (int )status );
261
+ status = cudssMatrixSetCsrPointers (p -> d_kkt_mat , p -> d_kkt_row_ptr , NULL ,
262
+ p -> d_kkt_col_ind , p -> d_kkt_val );
263
+ if (status != CUDSS_STATUS_SUCCESS ) {
264
+ scs_printf (
265
+ "scs_update_lin_sys_diag_r: Error updating kkt matrix on device: %d\n" ,
266
+ (int )status );
264
267
return ;
265
268
}
266
269
267
270
/* Perform Refactorization with the updated matrix */
268
- status = cudssExecute (p -> handle , CUDSS_PHASE_REFACTORIZATION , p -> solver_config , p -> solver_data ,
269
- p -> d_kkt_mat , p -> d_sol_mat , p -> d_b_mat );
270
- if (status != CUDSS_STATUS_SUCCESS )
271
- {
272
- scs_printf ("scs_update_lin_sys_diag_r: Error during re-factorization: %d\n" , (int )status );
271
+ status =
272
+ cudssExecute (p -> handle , CUDSS_PHASE_REFACTORIZATION , p -> solver_config ,
273
+ p -> solver_data , p -> d_kkt_mat , p -> d_sol_mat , p -> d_b_mat );
274
+ if (status != CUDSS_STATUS_SUCCESS ) {
275
+ scs_printf ("scs_update_lin_sys_diag_r: Error during re-factorization: %d\n" ,
276
+ (int )status );
273
277
return ;
274
278
}
275
279
}
0 commit comments