Skip to content

fix incorrect type for QDLDL_bool #319

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions linsys/cpu/direct/private.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ static scs_int ldl_prepare(ScsLinSysWork *p) {
L->i = (scs_int *)scs_calloc(nzmax, sizeof(scs_int));
p->Dinv = (scs_float *)scs_calloc(n, sizeof(scs_float));
p->D = (scs_float *)scs_calloc(n, sizeof(scs_float));
p->bwork = (scs_int *)scs_calloc(n, sizeof(scs_int));
p->bwork = (QDLDL_bool *)scs_calloc(n, sizeof(QDLDL_bool));
p->fwork = (scs_float *)scs_calloc(n, sizeof(scs_float));
return nzmax;
}
Expand Down Expand Up @@ -143,7 +143,7 @@ static ScsMatrix *cs_symperm(const ScsMatrix *A, const scs_int *pinv,
i = Ai[p];
if (i > j) {
continue;
} /* skip lower triangular part of A */
} /* skip lower triangular part of A */
i2 = pinv ? pinv[i] : i; /* row i of A is row i2 of C */
w[MAX(i2, j2)]++; /* column count of C */
}
Expand All @@ -155,7 +155,7 @@ static ScsMatrix *cs_symperm(const ScsMatrix *A, const scs_int *pinv,
i = Ai[p];
if (i > j) {
continue;
} /* skip lower triangular part of A*/
} /* skip lower triangular part of A*/
i2 = pinv ? pinv[i] : i; /* row i of A is row i2 of C */
Ci[q = w[MAX(i2, j2)]++] = MIN(i2, j2);
if (Cx) {
Expand Down
3 changes: 2 additions & 1 deletion linsys/cpu/direct/private.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ struct SCS_LIN_SYS_WORK {
scs_int factorizations;
/* ldl factorization workspace */
scs_float *D, *fwork;
scs_int *etree, *iwork, *Lnz, *bwork;
scs_int *etree, *iwork, *Lnz;
QDLDL_bool *bwork;
scs_float *diag_p;
};

Expand Down
2 changes: 1 addition & 1 deletion linsys/csparse.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ ScsMatrix *SCS(cs_spalloc)(scs_int m, scs_int n, scs_int nzmax, scs_int values,
ScsMatrix *A = (ScsMatrix *)scs_calloc(1, sizeof(ScsMatrix));
if (!A) {
return SCS_NULL;
} /* out of memory */
} /* out of memory */
A->m = m; /* define dimensions and nzmax */
A->n = n;
A->p = (scs_int *)scs_calloc((triplet ? nzmax : n + 1), sizeof(scs_int));
Expand Down
180 changes: 92 additions & 88 deletions linsys/cudss/direct/private.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,42 +2,35 @@
#include "linsys.h"

/* In case of error abort freeing p */
#define CUDSS_CHECK_ABORT(call, p, fname) \
do \
{ \
cudssStatus_t status = call; \
if (status != CUDSS_STATUS_SUCCESS) \
{ \
scs_printf("CUDSS call " #fname " returned status = %d\n", status); \
scs_free_lin_sys_work(p); \
return SCS_NULL; \
} \
#define CUDSS_CHECK_ABORT(call, p, fname) \
do { \
cudssStatus_t status = call; \
if (status != CUDSS_STATUS_SUCCESS) { \
scs_printf("CUDSS call " #fname " returned status = %d\n", status); \
scs_free_lin_sys_work(p); \
return SCS_NULL; \
} \
} while (0);

/* In case of error abort freeing p */
#define CUDA_CHECK_ABORT(call, p, fname) \
do \
{ \
cudaError_t status = call; \
if (status != cudaSuccess) \
{ \
printf("CUDA call " #fname " returned status = %d\n", status); \
scs_free_lin_sys_work(p); \
return SCS_NULL; \
} \
#define CUDA_CHECK_ABORT(call, p, fname) \
do { \
cudaError_t status = call; \
if (status != cudaSuccess) { \
printf("CUDA call " #fname " returned status = %d\n", status); \
scs_free_lin_sys_work(p); \
return SCS_NULL; \
} \
} while (0);

/* Return the linear system method name */
const char *scs_get_lin_sys_method()
{
const char *scs_get_lin_sys_method() {
return "sparse-direct-cuDSS";
}

/* Free allocated resources for the linear system solver */
void scs_free_lin_sys_work(ScsLinSysWork *p)
{
if (p)
{
void scs_free_lin_sys_work(ScsLinSysWork *p) {
if (p) {
/* Free GPU resources */
if (p->d_kkt_val)
cudaFree(p->d_kkt_val);
Expand Down Expand Up @@ -81,8 +74,7 @@ void scs_free_lin_sys_work(ScsLinSysWork *p)

/* Initialize the linear system solver workspace */
ScsLinSysWork *scs_init_lin_sys_work(const ScsMatrix *A, const ScsMatrix *P,
const scs_float *diag_r)
{
const scs_float *diag_r) {
ScsLinSysWork *p = scs_calloc(1, sizeof(ScsLinSysWork));
if (!p)
return SCS_NULL;
Expand All @@ -94,31 +86,27 @@ ScsLinSysWork *scs_init_lin_sys_work(const ScsMatrix *A, const ScsMatrix *P,

/* Allocate CPU memory */
p->sol = (scs_float *)scs_malloc(sizeof(scs_float) * p->n_plus_m);
if (!p->sol)
{
if (!p->sol) {
scs_free_lin_sys_work(p);
return SCS_NULL;
}

p->diag_r_idxs = (scs_int *)scs_calloc(p->n_plus_m, sizeof(scs_int));
if (!p->diag_r_idxs)
{
if (!p->diag_r_idxs) {
scs_free_lin_sys_work(p);
return SCS_NULL;
}

p->diag_p = (scs_float *)scs_calloc(p->n, sizeof(scs_float));
if (!p->diag_p)
{
if (!p->diag_p) {
scs_free_lin_sys_work(p);
return SCS_NULL;
}

/* Form KKT matrix as upper-triangular, CSC */
/* Because of symmetry it is equivalent to lower-triangular, CSR */
p->kkt = SCS(form_kkt)(A, P, p->diag_p, diag_r, p->diag_r_idxs, 1);
if (!p->kkt)
{
if (!p->kkt) {
scs_printf("Error in forming KKT matrix");
scs_free_lin_sys_work(p);
return SCS_NULL;
Expand All @@ -131,115 +119,128 @@ ScsLinSysWork *scs_init_lin_sys_work(const ScsMatrix *A, const ScsMatrix *P,
CUDSS_CHECK_ABORT(cudssCreate(&p->handle), p, "cudssCreate");
/* Creating cuDSS solver configuration and data objects */

CUDSS_CHECK_ABORT(cudssConfigCreate(&p->solver_config), p, "cudssConfigCreate");
CUDSS_CHECK_ABORT(cudssDataCreate(p->handle, &p->solver_data), p, "cudssDataCreate");
CUDSS_CHECK_ABORT(cudssConfigCreate(&p->solver_config), p,
"cudssConfigCreate");
CUDSS_CHECK_ABORT(cudssDataCreate(p->handle, &p->solver_data), p,
"cudssDataCreate");

/* Allocate device memory for KKT matrix */
scs_int nnz = p->kkt->p[p->n_plus_m];

CUDA_CHECK_ABORT(cudaMalloc((void **)&p->d_kkt_val, nnz * sizeof(scs_float)), p, "cudaMalloc: kkt_val");
CUDA_CHECK_ABORT(cudaMalloc((void **)&p->d_kkt_row_ptr, (p->n_plus_m + 1) * sizeof(scs_int)), p, "cudaMalloc: kkt_row_ptr");
CUDA_CHECK_ABORT(cudaMalloc((void **)&p->d_kkt_col_ind, nnz * sizeof(scs_int)), p, "cudaMalloc: kkt_col_ind");
CUDA_CHECK_ABORT(cudaMalloc((void **)&p->d_kkt_val, nnz * sizeof(scs_float)),
p, "cudaMalloc: kkt_val");
CUDA_CHECK_ABORT(cudaMalloc((void **)&p->d_kkt_row_ptr,
(p->n_plus_m + 1) * sizeof(scs_int)),
p, "cudaMalloc: kkt_row_ptr");
CUDA_CHECK_ABORT(
cudaMalloc((void **)&p->d_kkt_col_ind, nnz * sizeof(scs_int)), p,
"cudaMalloc: kkt_col_ind");

/* Copy KKT matrix to device */
/* Note: we treat column pointers (p->kkt->p) as row pointers on the device */
CUDA_CHECK_ABORT(cudaMemcpy(p->d_kkt_val, p->kkt->x, nnz * sizeof(scs_float),
cudaMemcpyHostToDevice),
p, "cudaMemcpy: kkt_val");
CUDA_CHECK_ABORT(cudaMemcpy(p->d_kkt_row_ptr, p->kkt->p, (p->kkt->n + 1) * sizeof(scs_int),
CUDA_CHECK_ABORT(cudaMemcpy(p->d_kkt_row_ptr, p->kkt->p,
(p->kkt->n + 1) * sizeof(scs_int),
cudaMemcpyHostToDevice),
p, "cudaMemcpy: kkt_row_ptr");
CUDA_CHECK_ABORT(cudaMemcpy(p->d_kkt_col_ind, p->kkt->i, nnz * sizeof(scs_int),
cudaMemcpyHostToDevice),
CUDA_CHECK_ABORT(cudaMemcpy(p->d_kkt_col_ind, p->kkt->i,
nnz * sizeof(scs_int), cudaMemcpyHostToDevice),
p, "cudaMemcpy: kkt_col_ind");

/* Create kkt matrix descriptor */
/* We pass the kkt matrix as symmetric, lower triangular */
cudssMatrixType_t mtype = CUDSS_MTYPE_SYMMETRIC;
cudssMatrixViewType_t mview = CUDSS_MVIEW_LOWER;
cudssIndexBase_t base = CUDSS_BASE_ZERO;
CUDSS_CHECK_ABORT(cudssMatrixCreateCsr(&p->d_kkt_mat, p->kkt->m, p->kkt->n, nnz, p->d_kkt_row_ptr,
NULL, p->d_kkt_col_ind, p->d_kkt_val, SCS_CUDA_INDEX, SCS_CUDA_FLOAT,
mtype, mview, base),
CUDSS_CHECK_ABORT(cudssMatrixCreateCsr(
&p->d_kkt_mat, p->kkt->m, p->kkt->n, nnz,
p->d_kkt_row_ptr, NULL, p->d_kkt_col_ind, p->d_kkt_val,
SCS_CUDA_INDEX, SCS_CUDA_FLOAT, mtype, mview, base),
p, "cudssMatrixCreateCsr");

/* Allocate device memory for vectors */
CUDA_CHECK_ABORT(cudaMalloc((void **)&p->d_b, p->n_plus_m * sizeof(scs_float)), p, "cudaMalloc: b");
CUDA_CHECK_ABORT(cudaMalloc((void **)&p->d_sol, p->n_plus_m * sizeof(scs_float)), p, "cudaMalloc: sol");
CUDA_CHECK_ABORT(
cudaMalloc((void **)&p->d_b, p->n_plus_m * sizeof(scs_float)), p,
"cudaMalloc: b");
CUDA_CHECK_ABORT(
cudaMalloc((void **)&p->d_sol, p->n_plus_m * sizeof(scs_float)), p,
"cudaMalloc: sol");

/* Create RHS and solution matrix descriptors */
scs_int nrhs = 1;
CUDSS_CHECK_ABORT(cudssMatrixCreateDn(&p->d_b_mat, p->n_plus_m, nrhs, p->n_plus_m, p->d_b,
SCS_CUDA_FLOAT, CUDSS_LAYOUT_COL_MAJOR),
CUDSS_CHECK_ABORT(cudssMatrixCreateDn(&p->d_b_mat, p->n_plus_m, nrhs,
p->n_plus_m, p->d_b, SCS_CUDA_FLOAT,
CUDSS_LAYOUT_COL_MAJOR),
p, "cudssMatrixCreateDn: b");
CUDSS_CHECK_ABORT(cudssMatrixCreateDn(&p->d_sol_mat, p->n_plus_m, nrhs, p->n_plus_m, p->d_sol,
SCS_CUDA_FLOAT, CUDSS_LAYOUT_COL_MAJOR),
CUDSS_CHECK_ABORT(cudssMatrixCreateDn(&p->d_sol_mat, p->n_plus_m, nrhs,
p->n_plus_m, p->d_sol, SCS_CUDA_FLOAT,
CUDSS_LAYOUT_COL_MAJOR),
p, "cudssMatrixCreateDn: sol");

/* Symbolic factorization */
CUDSS_CHECK_ABORT(cudssExecute(p->handle, CUDSS_PHASE_ANALYSIS, p->solver_config, p->solver_data,
p->d_kkt_mat, p->d_sol_mat, p->d_b_mat),
CUDSS_CHECK_ABORT(cudssExecute(p->handle, CUDSS_PHASE_ANALYSIS,
p->solver_config, p->solver_data, p->d_kkt_mat,
p->d_sol_mat, p->d_b_mat),
p, "cudssExecute: analysis");

/* Numerical Factorization */
CUDSS_CHECK_ABORT(cudssExecute(p->handle, CUDSS_PHASE_FACTORIZATION, p->solver_config, p->solver_data,
p->d_kkt_mat, p->d_sol_mat, p->d_b_mat),
CUDSS_CHECK_ABORT(cudssExecute(p->handle, CUDSS_PHASE_FACTORIZATION,
p->solver_config, p->solver_data, p->d_kkt_mat,
p->d_sol_mat, p->d_b_mat),
p, "cudssExecute: factorization");

return p;
}

/* Solve the linear system for a given RHS b */
scs_int scs_solve_lin_sys(ScsLinSysWork *p, scs_float *b, const scs_float *ws,
scs_float tol)
{
scs_float tol) {
/* Copy right-hand side to device */
cudaError_t custatus = cudaMemcpy(p->d_b, b, p->n_plus_m * sizeof(scs_float),
cudaMemcpyHostToDevice);
if (custatus != cudaSuccess)
{
scs_printf("scs_solve_lin_sys: Error copying `b` side to device: %d\n", (int)custatus);
if (custatus != cudaSuccess) {
scs_printf("scs_solve_lin_sys: Error copying `b` side to device: %d\n",
(int)custatus);
return custatus;
}

// is this really needed?
cudssMatrixSetValues(p->d_b_mat, p->d_b);

/* Solve the system */
cudssStatus_t status = cudssExecute(p->handle, CUDSS_PHASE_SOLVE, p->solver_config, p->solver_data,
p->d_kkt_mat, p->d_sol_mat, p->d_b_mat);
cudssStatus_t status =
cudssExecute(p->handle, CUDSS_PHASE_SOLVE, p->solver_config,
p->solver_data, p->d_kkt_mat, p->d_sol_mat, p->d_b_mat);

if (status != CUDSS_STATUS_SUCCESS)
{
if (status != CUDSS_STATUS_SUCCESS) {
scs_printf("scs_solve_lin_sys: Error during solve: %d\n", (int)status);
return status;
}

/* Copy solution back to host */
custatus = cudaMemcpy(b, p->d_sol, p->n_plus_m * sizeof(scs_float),
cudaMemcpyDeviceToHost);
if (status != cudaSuccess)
{
scs_printf("scs_solve_lin_sys: Error copying d_sol to host: %d\n", (int)status);
if (status != cudaSuccess) {
scs_printf("scs_solve_lin_sys: Error copying d_sol to host: %d\n",
(int)status);
return status;
}

return 0; /* Success */
}

/* Update the KKT matrix when R changes */
void scs_update_lin_sys_diag_r(ScsLinSysWork *p, const scs_float *diag_r)
{
void scs_update_lin_sys_diag_r(ScsLinSysWork *p, const scs_float *diag_r) {
scs_int i;

/* Update KKT matrix on CPU */
for (i = 0; i < p->n; ++i)
{
for (i = 0; i < p->n; ++i) {
/* top left is R_x + P */
p->kkt->x[p->diag_r_idxs[i]] = p->diag_p[i] + diag_r[i];
}
for (i = p->n; i < p->n + p->m; ++i)
{
for (i = p->n; i < p->n + p->m; ++i) {
/* bottom right is -R_y */
p->kkt->x[p->diag_r_idxs[i]] = -diag_r[i];
}
Expand All @@ -248,28 +249,31 @@ void scs_update_lin_sys_diag_r(ScsLinSysWork *p, const scs_float *diag_r)
cudaError_t custatus = cudaMemcpy(p->d_kkt_val, p->kkt->x,
p->kkt->p[p->n_plus_m] * sizeof(scs_float),
cudaMemcpyHostToDevice);
if (custatus != cudaSuccess)
{
scs_printf("scs_update_lin_sys_diag_r: Error copying kkt->x to device: %d\n", (int)custatus);
if (custatus != cudaSuccess) {
scs_printf(
"scs_update_lin_sys_diag_r: Error copying kkt->x to device: %d\n",
(int)custatus);
return;
}

/* Update the matrix values in cuDSS */
cudssStatus_t status;
status = cudssMatrixSetCsrPointers(p->d_kkt_mat, p->d_kkt_row_ptr, NULL, p->d_kkt_col_ind,
p->d_kkt_val);
if (status != CUDSS_STATUS_SUCCESS)
{
scs_printf("scs_update_lin_sys_diag_r: Error updating kkt matrix on device: %d\n", (int)status);
status = cudssMatrixSetCsrPointers(p->d_kkt_mat, p->d_kkt_row_ptr, NULL,
p->d_kkt_col_ind, p->d_kkt_val);
if (status != CUDSS_STATUS_SUCCESS) {
scs_printf(
"scs_update_lin_sys_diag_r: Error updating kkt matrix on device: %d\n",
(int)status);
return;
}

/* Perform Refactorization with the updated matrix */
status = cudssExecute(p->handle, CUDSS_PHASE_REFACTORIZATION, p->solver_config, p->solver_data,
p->d_kkt_mat, p->d_sol_mat, p->d_b_mat);
if (status != CUDSS_STATUS_SUCCESS)
{
scs_printf("scs_update_lin_sys_diag_r: Error during re-factorization: %d\n", (int)status);
status =
cudssExecute(p->handle, CUDSS_PHASE_REFACTORIZATION, p->solver_config,
p->solver_data, p->d_kkt_mat, p->d_sol_mat, p->d_b_mat);
if (status != CUDSS_STATUS_SUCCESS) {
scs_printf("scs_update_lin_sys_diag_r: Error during re-factorization: %d\n",
(int)status);
return;
}
}
Loading
Loading