From 19f003bef8d2bdac05400ca93203d3781ab26bed Mon Sep 17 00:00:00 2001 From: Brendan O'Donoghue Date: Fri, 8 Aug 2025 20:00:34 +0100 Subject: [PATCH 1/3] fix incorrect type for QDLDL_bool --- linsys/cpu/direct/private.c | 2 +- linsys/cpu/direct/private.h | 3 ++- linsys/external/qdldl/qdldl_types.h | 2 +- scs.mk | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/linsys/cpu/direct/private.c b/linsys/cpu/direct/private.c index 2b33f2f3..11465720 100644 --- a/linsys/cpu/direct/private.c +++ b/linsys/cpu/direct/private.c @@ -51,7 +51,7 @@ static scs_int ldl_prepare(ScsLinSysWork *p) { L->i = (scs_int *)scs_calloc(nzmax, sizeof(scs_int)); p->Dinv = (scs_float *)scs_calloc(n, sizeof(scs_float)); p->D = (scs_float *)scs_calloc(n, sizeof(scs_float)); - p->bwork = (scs_int *)scs_calloc(n, sizeof(scs_int)); + p->bwork = (QDLDL_bool *)scs_calloc(n, sizeof(QDLDL_bool)); p->fwork = (scs_float *)scs_calloc(n, sizeof(scs_float)); return nzmax; } diff --git a/linsys/cpu/direct/private.h b/linsys/cpu/direct/private.h index d7b3ff47..14837e56 100644 --- a/linsys/cpu/direct/private.h +++ b/linsys/cpu/direct/private.h @@ -22,7 +22,8 @@ struct SCS_LIN_SYS_WORK { scs_int factorizations; /* ldl factorization workspace */ scs_float *D, *fwork; - scs_int *etree, *iwork, *Lnz, *bwork; + scs_int *etree, *iwork, *Lnz; + QDLDL_bool *bwork; scs_float *diag_p; }; diff --git a/linsys/external/qdldl/qdldl_types.h b/linsys/external/qdldl/qdldl_types.h index a0ccc1e2..dae9ea2e 100644 --- a/linsys/external/qdldl/qdldl_types.h +++ b/linsys/external/qdldl/qdldl_types.h @@ -12,7 +12,7 @@ extern "C" { #define QDLDL_int scs_int #define QDLDL_float scs_float -#define QDLDL_bool scs_int +#define QDLDL_bool unsigned char /* Maximum value of the signed type QDLDL_int */ #ifdef DLONG diff --git a/scs.mk b/scs.mk index c7f44c16..0b75db13 100644 --- a/scs.mk +++ b/scs.mk @@ -64,7 +64,7 @@ CUDSS_LDFLAGS = $(CULDFLAGS) -L$(CUDSS_PATH)/lib -lcudss # Add on default CFLAGS OPT = -O3 INCLUDE = -I. -Iinclude -Ilinsys -override CFLAGS += -g -Wall -Wwrite-strings -pedantic -funroll-loops -Wstrict-prototypes $(INCLUDE) $(OPT) +override CFLAGS += -g -Wall -Wwrite-strings -pedantic -funroll-loops -Wstrict-prototypes $(INCLUDE) $(OPT) -Werror=incompatible-pointer-types ifneq ($(ISWINDOWS), 1) override CFLAGS += -fPIC endif From 85454b8980d1cffe2085c2cc121945545c3b99ef Mon Sep 17 00:00:00 2001 From: Brendan O'Donoghue Date: Sun, 10 Aug 2025 15:05:23 +0100 Subject: [PATCH 2/3] clang-format -style="{BasedOnStyle: llvm, IndentWidth: 2, AllowShortFunctionsOnASingleLine: None, KeepEmptyLinesAtTheStartOfBlocks: false}" -i *.{c,h} --- linsys/cpu/direct/private.c | 4 +- linsys/csparse.c | 2 +- linsys/cudss/direct/private.c | 180 +++++++++--------- linsys/cudss/direct/private.h | 70 ++++--- linsys/gpu/indirect/private.c | 35 ++-- src/aa.c | 20 +- src/cones.c | 14 +- .../logdeterminant/log_cone_IPM.c | 71 +++---- .../logdeterminant/logdet_cone.c | 18 +- src/spectral_cones/nuclear/ell1_cone.c | 3 +- src/spectral_cones/nuclear/nuclear_cone.c | 11 +- .../sum-largest/sum_largest_eval_cone.c | 5 +- test/rng.h | 8 +- test/run_tests.c | 2 +- test/spectral_cones_problems/exp_design.h | 3 +- .../graph_partitioning.h | 3 +- test/spectral_cones_problems/robust_pca.h | 3 +- .../several_logdet_cones.h | 16 +- .../several_nuc_cone.h | 3 +- .../several_sum_largest.h | 3 +- 20 files changed, 214 insertions(+), 260 deletions(-) diff --git a/linsys/cpu/direct/private.c b/linsys/cpu/direct/private.c index 11465720..737e60d2 100644 --- a/linsys/cpu/direct/private.c +++ b/linsys/cpu/direct/private.c @@ -143,7 +143,7 @@ static ScsMatrix *cs_symperm(const ScsMatrix *A, const scs_int *pinv, i = Ai[p]; if (i > j) { continue; - } /* skip lower triangular part of A */ + } /* skip lower triangular part of A */ i2 = pinv ? pinv[i] : i; /* row i of A is row i2 of C */ w[MAX(i2, j2)]++; /* column count of C */ } @@ -155,7 +155,7 @@ static ScsMatrix *cs_symperm(const ScsMatrix *A, const scs_int *pinv, i = Ai[p]; if (i > j) { continue; - } /* skip lower triangular part of A*/ + } /* skip lower triangular part of A*/ i2 = pinv ? pinv[i] : i; /* row i of A is row i2 of C */ Ci[q = w[MAX(i2, j2)]++] = MIN(i2, j2); if (Cx) { diff --git a/linsys/csparse.c b/linsys/csparse.c index e3a9173e..14b916e1 100644 --- a/linsys/csparse.c +++ b/linsys/csparse.c @@ -7,7 +7,7 @@ ScsMatrix *SCS(cs_spalloc)(scs_int m, scs_int n, scs_int nzmax, scs_int values, ScsMatrix *A = (ScsMatrix *)scs_calloc(1, sizeof(ScsMatrix)); if (!A) { return SCS_NULL; - } /* out of memory */ + } /* out of memory */ A->m = m; /* define dimensions and nzmax */ A->n = n; A->p = (scs_int *)scs_calloc((triplet ? nzmax : n + 1), sizeof(scs_int)); diff --git a/linsys/cudss/direct/private.c b/linsys/cudss/direct/private.c index b6acf0fd..38315d60 100644 --- a/linsys/cudss/direct/private.c +++ b/linsys/cudss/direct/private.c @@ -2,42 +2,35 @@ #include "linsys.h" /* In case of error abort freeing p */ -#define CUDSS_CHECK_ABORT(call, p, fname) \ - do \ - { \ - cudssStatus_t status = call; \ - if (status != CUDSS_STATUS_SUCCESS) \ - { \ - scs_printf("CUDSS call " #fname " returned status = %d\n", status); \ - scs_free_lin_sys_work(p); \ - return SCS_NULL; \ - } \ +#define CUDSS_CHECK_ABORT(call, p, fname) \ + do { \ + cudssStatus_t status = call; \ + if (status != CUDSS_STATUS_SUCCESS) { \ + scs_printf("CUDSS call " #fname " returned status = %d\n", status); \ + scs_free_lin_sys_work(p); \ + return SCS_NULL; \ + } \ } while (0); /* In case of error abort freeing p */ -#define CUDA_CHECK_ABORT(call, p, fname) \ - do \ - { \ - cudaError_t status = call; \ - if (status != cudaSuccess) \ - { \ - printf("CUDA call " #fname " returned status = %d\n", status); \ - scs_free_lin_sys_work(p); \ - return SCS_NULL; \ - } \ +#define CUDA_CHECK_ABORT(call, p, fname) \ + do { \ + cudaError_t status = call; \ + if (status != cudaSuccess) { \ + printf("CUDA call " #fname " returned status = %d\n", status); \ + scs_free_lin_sys_work(p); \ + return SCS_NULL; \ + } \ } while (0); /* Return the linear system method name */ -const char *scs_get_lin_sys_method() -{ +const char *scs_get_lin_sys_method() { return "sparse-direct-cuDSS"; } /* Free allocated resources for the linear system solver */ -void scs_free_lin_sys_work(ScsLinSysWork *p) -{ - if (p) - { +void scs_free_lin_sys_work(ScsLinSysWork *p) { + if (p) { /* Free GPU resources */ if (p->d_kkt_val) cudaFree(p->d_kkt_val); @@ -81,8 +74,7 @@ void scs_free_lin_sys_work(ScsLinSysWork *p) /* Initialize the linear system solver workspace */ ScsLinSysWork *scs_init_lin_sys_work(const ScsMatrix *A, const ScsMatrix *P, - const scs_float *diag_r) -{ + const scs_float *diag_r) { ScsLinSysWork *p = scs_calloc(1, sizeof(ScsLinSysWork)); if (!p) return SCS_NULL; @@ -94,22 +86,19 @@ ScsLinSysWork *scs_init_lin_sys_work(const ScsMatrix *A, const ScsMatrix *P, /* Allocate CPU memory */ p->sol = (scs_float *)scs_malloc(sizeof(scs_float) * p->n_plus_m); - if (!p->sol) - { + if (!p->sol) { scs_free_lin_sys_work(p); return SCS_NULL; } p->diag_r_idxs = (scs_int *)scs_calloc(p->n_plus_m, sizeof(scs_int)); - if (!p->diag_r_idxs) - { + if (!p->diag_r_idxs) { scs_free_lin_sys_work(p); return SCS_NULL; } p->diag_p = (scs_float *)scs_calloc(p->n, sizeof(scs_float)); - if (!p->diag_p) - { + if (!p->diag_p) { scs_free_lin_sys_work(p); return SCS_NULL; } @@ -117,8 +106,7 @@ ScsLinSysWork *scs_init_lin_sys_work(const ScsMatrix *A, const ScsMatrix *P, /* Form KKT matrix as upper-triangular, CSC */ /* Because of symmetry it is equivalent to lower-triangular, CSR */ p->kkt = SCS(form_kkt)(A, P, p->diag_p, diag_r, p->diag_r_idxs, 1); - if (!p->kkt) - { + if (!p->kkt) { scs_printf("Error in forming KKT matrix"); scs_free_lin_sys_work(p); return SCS_NULL; @@ -131,26 +119,34 @@ ScsLinSysWork *scs_init_lin_sys_work(const ScsMatrix *A, const ScsMatrix *P, CUDSS_CHECK_ABORT(cudssCreate(&p->handle), p, "cudssCreate"); /* Creating cuDSS solver configuration and data objects */ - CUDSS_CHECK_ABORT(cudssConfigCreate(&p->solver_config), p, "cudssConfigCreate"); - CUDSS_CHECK_ABORT(cudssDataCreate(p->handle, &p->solver_data), p, "cudssDataCreate"); + CUDSS_CHECK_ABORT(cudssConfigCreate(&p->solver_config), p, + "cudssConfigCreate"); + CUDSS_CHECK_ABORT(cudssDataCreate(p->handle, &p->solver_data), p, + "cudssDataCreate"); /* Allocate device memory for KKT matrix */ scs_int nnz = p->kkt->p[p->n_plus_m]; - CUDA_CHECK_ABORT(cudaMalloc((void **)&p->d_kkt_val, nnz * sizeof(scs_float)), p, "cudaMalloc: kkt_val"); - CUDA_CHECK_ABORT(cudaMalloc((void **)&p->d_kkt_row_ptr, (p->n_plus_m + 1) * sizeof(scs_int)), p, "cudaMalloc: kkt_row_ptr"); - CUDA_CHECK_ABORT(cudaMalloc((void **)&p->d_kkt_col_ind, nnz * sizeof(scs_int)), p, "cudaMalloc: kkt_col_ind"); + CUDA_CHECK_ABORT(cudaMalloc((void **)&p->d_kkt_val, nnz * sizeof(scs_float)), + p, "cudaMalloc: kkt_val"); + CUDA_CHECK_ABORT(cudaMalloc((void **)&p->d_kkt_row_ptr, + (p->n_plus_m + 1) * sizeof(scs_int)), + p, "cudaMalloc: kkt_row_ptr"); + CUDA_CHECK_ABORT( + cudaMalloc((void **)&p->d_kkt_col_ind, nnz * sizeof(scs_int)), p, + "cudaMalloc: kkt_col_ind"); /* Copy KKT matrix to device */ /* Note: we treat column pointers (p->kkt->p) as row pointers on the device */ CUDA_CHECK_ABORT(cudaMemcpy(p->d_kkt_val, p->kkt->x, nnz * sizeof(scs_float), cudaMemcpyHostToDevice), p, "cudaMemcpy: kkt_val"); - CUDA_CHECK_ABORT(cudaMemcpy(p->d_kkt_row_ptr, p->kkt->p, (p->kkt->n + 1) * sizeof(scs_int), + CUDA_CHECK_ABORT(cudaMemcpy(p->d_kkt_row_ptr, p->kkt->p, + (p->kkt->n + 1) * sizeof(scs_int), cudaMemcpyHostToDevice), p, "cudaMemcpy: kkt_row_ptr"); - CUDA_CHECK_ABORT(cudaMemcpy(p->d_kkt_col_ind, p->kkt->i, nnz * sizeof(scs_int), - cudaMemcpyHostToDevice), + CUDA_CHECK_ABORT(cudaMemcpy(p->d_kkt_col_ind, p->kkt->i, + nnz * sizeof(scs_int), cudaMemcpyHostToDevice), p, "cudaMemcpy: kkt_col_ind"); /* Create kkt matrix descriptor */ @@ -158,32 +154,41 @@ ScsLinSysWork *scs_init_lin_sys_work(const ScsMatrix *A, const ScsMatrix *P, cudssMatrixType_t mtype = CUDSS_MTYPE_SYMMETRIC; cudssMatrixViewType_t mview = CUDSS_MVIEW_LOWER; cudssIndexBase_t base = CUDSS_BASE_ZERO; - CUDSS_CHECK_ABORT(cudssMatrixCreateCsr(&p->d_kkt_mat, p->kkt->m, p->kkt->n, nnz, p->d_kkt_row_ptr, - NULL, p->d_kkt_col_ind, p->d_kkt_val, SCS_CUDA_INDEX, SCS_CUDA_FLOAT, - mtype, mview, base), + CUDSS_CHECK_ABORT(cudssMatrixCreateCsr( + &p->d_kkt_mat, p->kkt->m, p->kkt->n, nnz, + p->d_kkt_row_ptr, NULL, p->d_kkt_col_ind, p->d_kkt_val, + SCS_CUDA_INDEX, SCS_CUDA_FLOAT, mtype, mview, base), p, "cudssMatrixCreateCsr"); /* Allocate device memory for vectors */ - CUDA_CHECK_ABORT(cudaMalloc((void **)&p->d_b, p->n_plus_m * sizeof(scs_float)), p, "cudaMalloc: b"); - CUDA_CHECK_ABORT(cudaMalloc((void **)&p->d_sol, p->n_plus_m * sizeof(scs_float)), p, "cudaMalloc: sol"); + CUDA_CHECK_ABORT( + cudaMalloc((void **)&p->d_b, p->n_plus_m * sizeof(scs_float)), p, + "cudaMalloc: b"); + CUDA_CHECK_ABORT( + cudaMalloc((void **)&p->d_sol, p->n_plus_m * sizeof(scs_float)), p, + "cudaMalloc: sol"); /* Create RHS and solution matrix descriptors */ scs_int nrhs = 1; - CUDSS_CHECK_ABORT(cudssMatrixCreateDn(&p->d_b_mat, p->n_plus_m, nrhs, p->n_plus_m, p->d_b, - SCS_CUDA_FLOAT, CUDSS_LAYOUT_COL_MAJOR), + CUDSS_CHECK_ABORT(cudssMatrixCreateDn(&p->d_b_mat, p->n_plus_m, nrhs, + p->n_plus_m, p->d_b, SCS_CUDA_FLOAT, + CUDSS_LAYOUT_COL_MAJOR), p, "cudssMatrixCreateDn: b"); - CUDSS_CHECK_ABORT(cudssMatrixCreateDn(&p->d_sol_mat, p->n_plus_m, nrhs, p->n_plus_m, p->d_sol, - SCS_CUDA_FLOAT, CUDSS_LAYOUT_COL_MAJOR), + CUDSS_CHECK_ABORT(cudssMatrixCreateDn(&p->d_sol_mat, p->n_plus_m, nrhs, + p->n_plus_m, p->d_sol, SCS_CUDA_FLOAT, + CUDSS_LAYOUT_COL_MAJOR), p, "cudssMatrixCreateDn: sol"); /* Symbolic factorization */ - CUDSS_CHECK_ABORT(cudssExecute(p->handle, CUDSS_PHASE_ANALYSIS, p->solver_config, p->solver_data, - p->d_kkt_mat, p->d_sol_mat, p->d_b_mat), + CUDSS_CHECK_ABORT(cudssExecute(p->handle, CUDSS_PHASE_ANALYSIS, + p->solver_config, p->solver_data, p->d_kkt_mat, + p->d_sol_mat, p->d_b_mat), p, "cudssExecute: analysis"); /* Numerical Factorization */ - CUDSS_CHECK_ABORT(cudssExecute(p->handle, CUDSS_PHASE_FACTORIZATION, p->solver_config, p->solver_data, - p->d_kkt_mat, p->d_sol_mat, p->d_b_mat), + CUDSS_CHECK_ABORT(cudssExecute(p->handle, CUDSS_PHASE_FACTORIZATION, + p->solver_config, p->solver_data, p->d_kkt_mat, + p->d_sol_mat, p->d_b_mat), p, "cudssExecute: factorization"); return p; @@ -191,14 +196,13 @@ ScsLinSysWork *scs_init_lin_sys_work(const ScsMatrix *A, const ScsMatrix *P, /* Solve the linear system for a given RHS b */ scs_int scs_solve_lin_sys(ScsLinSysWork *p, scs_float *b, const scs_float *ws, - scs_float tol) -{ + scs_float tol) { /* Copy right-hand side to device */ cudaError_t custatus = cudaMemcpy(p->d_b, b, p->n_plus_m * sizeof(scs_float), cudaMemcpyHostToDevice); - if (custatus != cudaSuccess) - { - scs_printf("scs_solve_lin_sys: Error copying `b` side to device: %d\n", (int)custatus); + if (custatus != cudaSuccess) { + scs_printf("scs_solve_lin_sys: Error copying `b` side to device: %d\n", + (int)custatus); return custatus; } @@ -206,11 +210,11 @@ scs_int scs_solve_lin_sys(ScsLinSysWork *p, scs_float *b, const scs_float *ws, cudssMatrixSetValues(p->d_b_mat, p->d_b); /* Solve the system */ - cudssStatus_t status = cudssExecute(p->handle, CUDSS_PHASE_SOLVE, p->solver_config, p->solver_data, - p->d_kkt_mat, p->d_sol_mat, p->d_b_mat); + cudssStatus_t status = + cudssExecute(p->handle, CUDSS_PHASE_SOLVE, p->solver_config, + p->solver_data, p->d_kkt_mat, p->d_sol_mat, p->d_b_mat); - if (status != CUDSS_STATUS_SUCCESS) - { + if (status != CUDSS_STATUS_SUCCESS) { scs_printf("scs_solve_lin_sys: Error during solve: %d\n", (int)status); return status; } @@ -218,9 +222,9 @@ scs_int scs_solve_lin_sys(ScsLinSysWork *p, scs_float *b, const scs_float *ws, /* Copy solution back to host */ custatus = cudaMemcpy(b, p->d_sol, p->n_plus_m * sizeof(scs_float), cudaMemcpyDeviceToHost); - if (status != cudaSuccess) - { - scs_printf("scs_solve_lin_sys: Error copying d_sol to host: %d\n", (int)status); + if (status != cudaSuccess) { + scs_printf("scs_solve_lin_sys: Error copying d_sol to host: %d\n", + (int)status); return status; } @@ -228,18 +232,15 @@ scs_int scs_solve_lin_sys(ScsLinSysWork *p, scs_float *b, const scs_float *ws, } /* Update the KKT matrix when R changes */ -void scs_update_lin_sys_diag_r(ScsLinSysWork *p, const scs_float *diag_r) -{ +void scs_update_lin_sys_diag_r(ScsLinSysWork *p, const scs_float *diag_r) { scs_int i; /* Update KKT matrix on CPU */ - for (i = 0; i < p->n; ++i) - { + for (i = 0; i < p->n; ++i) { /* top left is R_x + P */ p->kkt->x[p->diag_r_idxs[i]] = p->diag_p[i] + diag_r[i]; } - for (i = p->n; i < p->n + p->m; ++i) - { + for (i = p->n; i < p->n + p->m; ++i) { /* bottom right is -R_y */ p->kkt->x[p->diag_r_idxs[i]] = -diag_r[i]; } @@ -248,28 +249,31 @@ void scs_update_lin_sys_diag_r(ScsLinSysWork *p, const scs_float *diag_r) cudaError_t custatus = cudaMemcpy(p->d_kkt_val, p->kkt->x, p->kkt->p[p->n_plus_m] * sizeof(scs_float), cudaMemcpyHostToDevice); - if (custatus != cudaSuccess) - { - scs_printf("scs_update_lin_sys_diag_r: Error copying kkt->x to device: %d\n", (int)custatus); + if (custatus != cudaSuccess) { + scs_printf( + "scs_update_lin_sys_diag_r: Error copying kkt->x to device: %d\n", + (int)custatus); return; } /* Update the matrix values in cuDSS */ cudssStatus_t status; - status = cudssMatrixSetCsrPointers(p->d_kkt_mat, p->d_kkt_row_ptr, NULL, p->d_kkt_col_ind, - p->d_kkt_val); - if (status != CUDSS_STATUS_SUCCESS) - { - scs_printf("scs_update_lin_sys_diag_r: Error updating kkt matrix on device: %d\n", (int)status); + status = cudssMatrixSetCsrPointers(p->d_kkt_mat, p->d_kkt_row_ptr, NULL, + p->d_kkt_col_ind, p->d_kkt_val); + if (status != CUDSS_STATUS_SUCCESS) { + scs_printf( + "scs_update_lin_sys_diag_r: Error updating kkt matrix on device: %d\n", + (int)status); return; } /* Perform Refactorization with the updated matrix */ - status = cudssExecute(p->handle, CUDSS_PHASE_REFACTORIZATION, p->solver_config, p->solver_data, - p->d_kkt_mat, p->d_sol_mat, p->d_b_mat); - if (status != CUDSS_STATUS_SUCCESS) - { - scs_printf("scs_update_lin_sys_diag_r: Error during re-factorization: %d\n", (int)status); + status = + cudssExecute(p->handle, CUDSS_PHASE_REFACTORIZATION, p->solver_config, + p->solver_data, p->d_kkt_mat, p->d_sol_mat, p->d_b_mat); + if (status != CUDSS_STATUS_SUCCESS) { + scs_printf("scs_update_lin_sys_diag_r: Error during re-factorization: %d\n", + (int)status); return; } } diff --git a/linsys/cudss/direct/private.h b/linsys/cudss/direct/private.h index 68610b02..c087c2d0 100644 --- a/linsys/cudss/direct/private.h +++ b/linsys/cudss/direct/private.h @@ -2,8 +2,7 @@ #define PRIV_H_GUARD #ifdef __cplusplus -extern "C" -{ +extern "C" { #endif #ifndef SFLOAT @@ -23,40 +22,39 @@ extern "C" #include #include - struct SCS_LIN_SYS_WORK - { - /* General problem dimensions */ - scs_int n; /* number of QP variables */ - scs_int m; /* number of QP constraints */ - scs_int n_plus_m; /* dimension of the linear system */ - - /* CPU matrices and vectors */ - ScsMatrix *kkt; /* KKT matrix in CSR format */ - scs_float *sol; /* solution to the KKT system */ - - /* cuDSS handle and descriptors */ - cudssHandle_t handle; /* cuDSS library handle */ - cudssMatrix_t d_kkt_mat; /* cuDSS matrix descriptors */ - cudssMatrix_t d_b_mat; - cudssMatrix_t d_sol_mat; - - /* Device memory for KKT matrix */ - scs_float *d_kkt_val; /* device copy of KKT values */ - scs_int *d_kkt_row_ptr; /* device copy of KKT row pointers */ - scs_int *d_kkt_col_ind; /* device copy of KKT column indices */ - - /* Device memory for vectors */ - scs_float *d_b; /* device copy of right-hand side */ - scs_float *d_sol; /* device copy of solution */ - - /* These are required for matrix updates */ - scs_int *diag_r_idxs; /* indices where R appears in the KKT matrix */ - scs_float *diag_p; /* Diagonal values of P */ - - /* cuDSS configuration */ - cudssConfig_t solver_config; /* cuDSS solver handle */ - cudssData_t solver_data; /* cuDSS data handle */ - }; +struct SCS_LIN_SYS_WORK { + /* General problem dimensions */ + scs_int n; /* number of QP variables */ + scs_int m; /* number of QP constraints */ + scs_int n_plus_m; /* dimension of the linear system */ + + /* CPU matrices and vectors */ + ScsMatrix *kkt; /* KKT matrix in CSR format */ + scs_float *sol; /* solution to the KKT system */ + + /* cuDSS handle and descriptors */ + cudssHandle_t handle; /* cuDSS library handle */ + cudssMatrix_t d_kkt_mat; /* cuDSS matrix descriptors */ + cudssMatrix_t d_b_mat; + cudssMatrix_t d_sol_mat; + + /* Device memory for KKT matrix */ + scs_float *d_kkt_val; /* device copy of KKT values */ + scs_int *d_kkt_row_ptr; /* device copy of KKT row pointers */ + scs_int *d_kkt_col_ind; /* device copy of KKT column indices */ + + /* Device memory for vectors */ + scs_float *d_b; /* device copy of right-hand side */ + scs_float *d_sol; /* device copy of solution */ + + /* These are required for matrix updates */ + scs_int *diag_r_idxs; /* indices where R appears in the KKT matrix */ + scs_float *diag_p; /* Diagonal values of P */ + + /* cuDSS configuration */ + cudssConfig_t solver_config; /* cuDSS solver handle */ + cudssData_t solver_data; /* cuDSS data handle */ +}; #ifdef __cplusplus } diff --git a/linsys/gpu/indirect/private.c b/linsys/gpu/indirect/private.c index 78d71317..56fdc16b 100644 --- a/linsys/gpu/indirect/private.c +++ b/linsys/gpu/indirect/private.c @@ -147,28 +147,24 @@ static void mat_vec(ScsLinSysWork *p, const scs_float *x, scs_float *y) { if (p->Pg) { /* y = R_x * x + P x */ - SCS(accum_by_p_gpu) - (p->Pg, p->dn_vec_n, p->dn_vec_n_p, p->cusparse_handle, &p->buffer_size, - &p->buffer); + SCS(accum_by_p_gpu)(p->Pg, p->dn_vec_n, p->dn_vec_n_p, p->cusparse_handle, + &p->buffer_size, &p->buffer); } /* z = Ax */ #if GPU_TRANSPOSE_MAT > 0 - SCS(accum_by_atrans_gpu) - (p->Agt, p->dn_vec_n, p->dn_vec_m, p->cusparse_handle, &p->buffer_size, - &p->buffer); + SCS(accum_by_atrans_gpu)(p->Agt, p->dn_vec_n, p->dn_vec_m, p->cusparse_handle, + &p->buffer_size, &p->buffer); #else - SCS(accum_by_a_gpu) - (p->Ag, p->dn_vec_n, p->dn_vec_m, p->cusparse_handle, &p->buffer_size, - &p->buffer); + SCS(accum_by_a_gpu)(p->Ag, p->dn_vec_n, p->dn_vec_m, p->cusparse_handle, + &p->buffer_size, &p->buffer); #endif /* z = R_y^{-1} A x */ scale_by_diag(p->cublas_handle, p->inv_r_y_gpu, z, p->m); /* y += A'z => y = R_x * x + P x + A' R_y^{-1} Ax */ - SCS(accum_by_atrans_gpu) - (p->Ag, p->dn_vec_m, p->dn_vec_n_p, p->cusparse_handle, &p->buffer_size, - &p->buffer); + SCS(accum_by_atrans_gpu)(p->Ag, p->dn_vec_m, p->dn_vec_n_p, + p->cusparse_handle, &p->buffer_size, &p->buffer); } /* P comes in upper triangular, expand to full @@ -488,9 +484,8 @@ scs_int scs_solve_lin_sys(ScsLinSysWork *p, scs_float *b, const scs_float *s, cusparseDnVecSetValues(p->dn_vec_m, (void *)tmp_m); /* R * ry */ cusparseDnVecSetValues(p->dn_vec_n, (void *)bg); /* rx */ /* bg[:n] = rx + A' R ry */ - SCS(accum_by_atrans_gpu) - (Ag, p->dn_vec_m, p->dn_vec_n, p->cusparse_handle, &p->buffer_size, - &p->buffer); + SCS(accum_by_atrans_gpu)(Ag, p->dn_vec_m, p->dn_vec_n, p->cusparse_handle, + &p->buffer_size, &p->buffer); /* set max_iters to 10 * n (though in theory n is enough for any tol) */ max_iters = 10 * Ag->n; @@ -506,13 +501,11 @@ scs_int scs_solve_lin_sys(ScsLinSysWork *p, scs_float *b, const scs_float *s, /* b[n:] = Ax - ry */ #if GPU_TRANSPOSE_MAT > 0 - SCS(accum_by_atrans_gpu) - (p->Agt, p->dn_vec_n, p->dn_vec_m, p->cusparse_handle, &p->buffer_size, - &p->buffer); + SCS(accum_by_atrans_gpu)(p->Agt, p->dn_vec_n, p->dn_vec_m, p->cusparse_handle, + &p->buffer_size, &p->buffer); #else - SCS(accum_by_a_gpu) - (Ag, p->dn_vec_n, p->dn_vec_m, p->cusparse_handle, &p->buffer_size, - &p->buffer); + SCS(accum_by_a_gpu)(Ag, p->dn_vec_n, p->dn_vec_m, p->cusparse_handle, + &p->buffer_size, &p->buffer); #endif /* bg[n:] = R_y^{-1} bg[n:] = R_y^{-1} (Ax - ry) = y */ diff --git a/src/aa.c b/src/aa.c index a7cb1865..474bf272 100644 --- a/src/aa.c +++ b/src/aa.c @@ -189,9 +189,8 @@ static void set_m(AaWork *a, aa_int len) { blas_int blen = (blas_int)len; aa_float onef = 1.0, zerof = 0.0, r; /* if len < mem this only uses len cols */ - BLAS(gemm) - ("Trans", "No", &blen, &blen, &bdim, &onef, a->type1 ? a->S : a->Y, &bdim, - a->Y, &bdim, &zerof, a->M, &blen); + BLAS(gemm)("Trans", "No", &blen, &blen, &bdim, &onef, a->type1 ? a->S : a->Y, + &bdim, a->Y, &bdim, &zerof, a->M, &blen); if (a->regularization > 0) { r = compute_regularization(a, len); for (i = 0; i < len; ++i) { @@ -287,9 +286,8 @@ static void relax(aa_float *f, AaWork *a, aa_int len) { aa_float onef = 1.0, neg_onef = -1.0; aa_float one_m_relaxation = 1. - a->relaxation; /* x_work = x - S * work */ - BLAS(gemv) - ("NoTrans", &bdim, &blen, &neg_onef, a->S, &bdim, a->work, &one, &onef, - a->x_work, &one); + BLAS(gemv)("NoTrans", &bdim, &blen, &neg_onef, a->S, &bdim, a->work, &one, + &onef, a->x_work, &one); /* f = relaxation * f */ BLAS(scal)(&bdim, &a->relaxation, f, &one); /* f += (1 - relaxation) * x_work */ @@ -306,9 +304,8 @@ static aa_float solve(aa_float *f, AaWork *a, aa_int len) { aa_float onef = 1.0, zerof = 0.0, neg_onef = -1.0, aa_norm; /* work = S'g or Y'g */ - BLAS(gemv) - ("Trans", &bdim, &blen, &onef, a->type1 ? a->S : a->Y, &bdim, a->g, &one, - &zerof, a->work, &one); + BLAS(gemv)("Trans", &bdim, &blen, &onef, a->type1 ? a->S : a->Y, &bdim, a->g, + &one, &zerof, a->work, &one); /* work = M \ work, where update_accel_params has set M = S'Y or M = Y'Y */ BLAS(gesv)(&blen, &one, a->M, &blen, a->ipiv, a->work, &blen, &info); @@ -335,9 +332,8 @@ static aa_float solve(aa_float *f, AaWork *a, aa_int len) { /* if solve was successful compute new point */ /* first set f -= D * work */ - BLAS(gemv) - ("NoTrans", &bdim, &blen, &neg_onef, a->D, &bdim, a->work, &one, &onef, f, - &one); + BLAS(gemv)("NoTrans", &bdim, &blen, &neg_onef, a->D, &bdim, a->work, &one, + &onef, f, &one); /* if relaxation is not 1 then need to incorporate */ if (a->relaxation != 1.0) { diff --git a/src/cones.c b/src/cones.c index c39b051c..8be9ac01 100644 --- a/src/cones.c +++ b/src/cones.c @@ -731,9 +731,8 @@ static scs_int set_up_sd_cone_work_space(ScsConeWork *c, const ScsCone *k) { c->e = (scs_float *)scs_calloc(n_max, sizeof(scs_float)); /* workspace query */ - BLAS(syev) - ("Vectors", "Lower", &n_max, c->Xs, &n_max, SCS_NULL, &wkopt, &neg_one, - &info); + BLAS(syev)("Vectors", "Lower", &n_max, c->Xs, &n_max, SCS_NULL, &wkopt, + &neg_one, &info); if (info != 0) { scs_printf("FATAL: syev workspace query failure, info = %li\n", (long)info); @@ -774,9 +773,9 @@ static scs_int set_up_sd_cone_work_space(ScsConeWork *c, const ScsCone *k) { } // workspace query - BLAS(gesvd) - ("S", "A", &m_max_nuc, &n_max_nuc, c->u_nuc, &m_max_nuc, c->s_nuc, c->u_nuc, - &m_max_nuc, c->vt_nuc, &n_max_nuc, &wkopt, &neg_one, &info); + BLAS(gesvd)("S", "A", &m_max_nuc, &n_max_nuc, c->u_nuc, &m_max_nuc, + c->s_nuc, c->u_nuc, &m_max_nuc, c->vt_nuc, &n_max_nuc, &wkopt, + &neg_one, &info); c->lwork_nuc = (blas_int)(wkopt + 1); /* +1 for int casting safety */ c->work_nuc = (scs_float *)scs_calloc(c->lwork_nuc, sizeof(scs_float)); @@ -896,8 +895,7 @@ static scs_int proj_semi_definite_cone(scs_float *X, const scs_int n, } } - SCS(tic) - (&_timer); + SCS(tic)(&_timer); first_idx = -1; /* e is eigvals in ascending order, find first entry > 0 */ diff --git a/src/spectral_cones/logdeterminant/log_cone_IPM.c b/src/spectral_cones/logdeterminant/log_cone_IPM.c index 764ca021..00a1f44f 100644 --- a/src/spectral_cones/logdeterminant/log_cone_IPM.c +++ b/src/spectral_cones/logdeterminant/log_cone_IPM.c @@ -60,8 +60,7 @@ static void f_oracle(const scs_float *u1, scs_float r, scs_float t0, grad_f0[0] = u1[0] - t0; grad_f0[1] = u1[1] - v0; memcpy(grad_f0 + 2, u1 + 2, n * sizeof(*grad_f0)); - SCS(add_scaled_array) - (grad_f0 + 2, x0, n, -1.0); + SCS(add_scaled_array)(grad_f0 + 2, x0, n, -1.0); grad_f0[n + 2] = -1.0; // compute f_u @@ -78,8 +77,7 @@ static void f_oracle(const scs_float *u1, scs_float r, scs_float t0, grad_f1[0] = -1.0; grad_f1[1] = n - sum_log_xv; memcpy(grad_f1 + 2, x_inv, n * sizeof(*x_inv)); - SCS(scale_array) - (grad_f1 + 2, -u1[1], n); + SCS(scale_array)(grad_f1 + 2, -u1[1], n); grad_f1[n + 2] = 0.0; } @@ -227,11 +225,9 @@ static void KKT_solve(const scs_float *z, const scs_float *w, rhs_reduced[u_dim + 2] -= w[2] * (rhs2[2] / lmbda[2]); memcpy(bnew, rhs_reduced, u_dim * sizeof(*rhs_reduced)); scs_float scale = rhs_reduced[n + 3] / w[0]; - SCS(add_scaled_array) - (bnew, g0g1, u_dim, scale); + SCS(add_scaled_array)(bnew, g0g1, u_dim, scale); scale = rhs_reduced[n + 4] / w[1]; - SCS(add_scaled_array) - (bnew, g0g1 + u_dim, u_dim, scale); + SCS(add_scaled_array)(bnew, g0g1 + u_dim, u_dim, scale); bnew[1] -= (rhs_reduced[n + 5] / (w[2] * w[2])); // ------------------------------------------------------------------------- @@ -278,11 +274,9 @@ static void KKT_solve(const scs_float *z, const scs_float *w, // -------------------------------------------------------------------- char trans = 'N'; - BLAS(gemv) - (&trans, &u_dim, &bi_three, &d_minus_one, GinvC, &u_dim, RinvCTGinvRes, - &bi_one, &d_one, GinvRes, &bi_one); - SCS(add_scaled_array) - (du1, GinvRes, u1_dim, 1.0); + BLAS(gemv)(&trans, &u_dim, &bi_three, &d_minus_one, GinvC, &u_dim, + RinvCTGinvRes, &bi_one, &d_one, GinvRes, &bi_one); + SCS(add_scaled_array)(du1, GinvRes, u1_dim, 1.0); *dr += GinvRes[n + 2]; // ----------------------------------- @@ -303,19 +297,15 @@ static void KKT_solve(const scs_float *z, const scs_float *w, scs_float coeff1 = SCS(dot)(g0g1 + u_dim, du1, u1_dim) + g0g1[2 * n + 5] * (*dr); memcpy(CCTdu, g0g1, u_dim * sizeof(*g0g1)); - SCS(scale_array) - (CCTdu, coeff0, u_dim); - SCS(add_scaled_array) - (CCTdu, g0g1 + u_dim, u_dim, coeff1); + SCS(scale_array)(CCTdu, coeff0, u_dim); + SCS(add_scaled_array)(CCTdu, g0g1 + u_dim, u_dim, coeff1); CCTdu[n + 2] += (*dr); memcpy(residual, bnew, u_dim * sizeof(*bnew)); // important to compute the residual as residual = bnew - (Gdu + // CCTdu) and not as residual = (bnew - Gdu) - CCTdu - SCS(add_scaled_array) - (Gdu, CCTdu, u_dim, 1.0); - SCS(add_scaled_array) - (residual, Gdu, u_dim, -1.0); + SCS(add_scaled_array)(Gdu, CCTdu, u_dim, 1.0); + SCS(add_scaled_array)(residual, Gdu, u_dim, -1.0); } } @@ -354,8 +344,7 @@ scs_int log_cone_IPM(scs_float t0, scs_float v0, scs_float *x0, scs_float *u1, scale1 = 1 / scale1; t0 = t0 * scale1; v0 = v0 * scale1; - SCS(scale_array) - (x0, scale1, n); + SCS(scale_array)(x0, scale1, n); // ---------------------------------------------------------------------- // Parse workspace @@ -454,10 +443,8 @@ scs_int log_cone_IPM(scs_float t0, scs_float v0, scs_float *x0, scs_float *u1, // compute residuals and check termination criteria // -------------------------------------------------------- memcpy(rx, grad_f0, u_dim * sizeof(*grad_f0)); - SCS(scale_array) - (rx, z[0], u_dim); - SCS(add_scaled_array) - (rx, grad_f1, u_dim, z[1]); + SCS(scale_array)(rx, z[0], u_dim); + SCS(add_scaled_array)(rx, grad_f1, u_dim, z[1]); rx[1] -= z[2]; rx[n + 2] += 1; rznl[0] = f_u[0] + s[0]; @@ -503,11 +490,9 @@ scs_int log_cone_IPM(scs_float t0, scs_float v0, scs_float *x0, scs_float *u1, // precomputations for KKT system // ----------------------------------------------------------------- scs_float scale2 = 1 / w[0]; - SCS(scale_array) - (grad_f0, scale2, u_dim); + SCS(scale_array)(grad_f0, scale2, u_dim); scale2 = 1 / w[1]; - SCS(scale_array) - (grad_f1, scale2, u_dim); + SCS(scale_array)(grad_f1, scale2, u_dim); KKT_precompute(u1[1], x_inv, z, w, n, &KKT_work); scs_float rhs2_aff[] = {-lmbda[0] * lmbda[0], -lmbda[1] * lmbda[1], @@ -532,8 +517,7 @@ scs_int log_cone_IPM(scs_float t0, scs_float v0, scs_float *x0, scs_float *u1, // For i = 1 we compute the actual search direction. // ------------------------------------------------------------ if (i == 1 && variant == 0) { - SCS(scale_array) - (rhs1_aff, 1 - sigma, u_dim + 3); + SCS(scale_array)(rhs1_aff, 1 - sigma, u_dim + 3); rhs2_aff[0] += (sigma * mu - ds[0] * dz[0]); rhs2_aff[1] += (sigma * mu - ds[1] * dz[1]); rhs2_aff[2] += (sigma * mu - ds[2] * dz[2]); @@ -553,15 +537,12 @@ scs_int log_cone_IPM(scs_float t0, scs_float v0, scs_float *x0, scs_float *u1, while (backtrack) { // u_new = u + step * du, z_new = z + step * dz, s_new = s + step memcpy(u1_new, u1, u1_dim * sizeof(*u1)); - SCS(add_scaled_array) - (u1_new, du1, u1_dim, step_size); + SCS(add_scaled_array)(u1_new, du1, u1_dim, step_size); rnew = r + step_size * dr; memcpy(z_new, z, 3 * sizeof(*z)); - SCS(add_scaled_array) - (z_new, dz, m, step_size); + SCS(add_scaled_array)(z_new, dz, m, step_size); memcpy(s_new, s, 3 * sizeof(*s)); - SCS(add_scaled_array) - (s_new, ds, m, step_size); + SCS(add_scaled_array)(s_new, ds, m, step_size); // evaluate oracle in u_new for (scs_int i = 0; i < n; i++) { @@ -572,10 +553,8 @@ scs_int log_cone_IPM(scs_float t0, scs_float v0, scs_float *x0, scs_float *u1, // compute residuals and merit function memcpy(rx_new, grad_f0_new, u_dim * sizeof(*grad_f0_new)); - SCS(scale_array) - (rx_new, z_new[0], u_dim); - SCS(add_scaled_array) - (rx_new, grad_f1_new, u_dim, z_new[1]); + SCS(scale_array)(rx_new, z_new[0], u_dim); + SCS(add_scaled_array)(rx_new, grad_f1_new, u_dim, z_new[1]); rx_new[1] -= z_new[2]; rx_new[n + 2] += 1; rznl_new[0] = f_u_new[0] + s_new[0]; @@ -674,10 +653,8 @@ scs_int log_cone_IPM(scs_float t0, scs_float v0, scs_float *x0, scs_float *u1, // unscale solution scale1 = 1 / scale1; - SCS(scale_array) - (x0, scale1, n); - SCS(scale_array) - (u1, scale1, u1_dim); + SCS(scale_array)(x0, scale1, n); + SCS(scale_array)(u1, scale1, u1_dim); stats->iter = iter; return 0; } diff --git a/src/spectral_cones/logdeterminant/logdet_cone.c b/src/spectral_cones/logdeterminant/logdet_cone.c index 488c417b..1013bfac 100644 --- a/src/spectral_cones/logdeterminant/logdet_cone.c +++ b/src/spectral_cones/logdeterminant/logdet_cone.c @@ -53,8 +53,7 @@ scs_int SCS(proj_logdet_cone)(scs_float *tvX, scs_int n, ScsConeWork *c, #ifndef USE_LAPACK scs_printf("FAILURE: solving SDP but no blas/lapack libraries were found!\n"); scs_printf("SCS will return nonsense!\n"); - SCS(scale_array) - (X, NAN, n); + SCS(scale_array)(X, NAN, n); return -1; #endif @@ -83,14 +82,12 @@ scs_int SCS(proj_logdet_cone)(scs_float *tvX, scs_int n, ScsConeWork *c, } // rescale diags by sqrt(2) - BLAS(scal) - (&nb, &sqrt2, Xs, &nb_plus_one); + BLAS(scal)(&nb, &sqrt2, Xs, &nb_plus_one); // Eigendecomposition. On exit, the lower triangular part of Xs stores // the eigenvectors. The vector e stores the eigenvalues in ascending // order (smallest eigenvalue first) */ - BLAS(syev) - ("Vectors", "Lower", &nb, Xs, &nb, e, work, &lwork, &info); + BLAS(syev)("Vectors", "Lower", &nb, Xs, &nb, e, work, &lwork, &info); if (info != 0) { scs_printf("WARN: LAPACK syev error, info = %i\n", (int)info); if (info < 0) { @@ -129,14 +126,11 @@ scs_int SCS(proj_logdet_cone)(scs_float *tvX, scs_int n, ScsConeWork *c, for (i = 0; i < n; ++i) { assert(evals_proj[i] >= 0); sq_eig_pos = SQRTF(evals_proj[i]); - BLAS(scal) - (&nb, &sq_eig_pos, &Xs[i * n], &one_int); + BLAS(scal)(&nb, &sq_eig_pos, &Xs[i * n], &one_int); } - BLAS(syrk) - ("Lower", "NoTrans", &nb, &nb, &one, Xs, &nb, &zero, Z, &nb); - BLAS(scal) - (&nb, &sqrt2_inv, Z, &nb_plus_one); + BLAS(syrk)("Lower", "NoTrans", &nb, &nb, &one, Xs, &nb, &zero, Z, &nb); + BLAS(scal)(&nb, &sqrt2_inv, Z, &nb_plus_one); for (i = 0; i < n; ++i) { memcpy(&(X[i * n - ((i - 1) * i) / 2]), &(Z[i * (n + 1)]), diff --git a/src/spectral_cones/nuclear/ell1_cone.c b/src/spectral_cones/nuclear/ell1_cone.c index d769ff19..fd7b6b19 100644 --- a/src/spectral_cones/nuclear/ell1_cone.c +++ b/src/spectral_cones/nuclear/ell1_cone.c @@ -50,8 +50,7 @@ static void compute_cone_residuals_ell1(const scs_float *tx, scs_float t0, blas_int int_n = n; scs_float negOne = -1.0; blas_int one = 1; - BLAS(axpy) - (&int_n, &negOne, x0, &one, dualx, &one); + BLAS(axpy)(&int_n, &negOne, x0, &one, dualx, &one); // --------------------------------------- // Compute complementarity measure diff --git a/src/spectral_cones/nuclear/nuclear_cone.c b/src/spectral_cones/nuclear/nuclear_cone.c index 7e022687..83b7933d 100644 --- a/src/spectral_cones/nuclear/nuclear_cone.c +++ b/src/spectral_cones/nuclear/nuclear_cone.c @@ -62,8 +62,8 @@ scs_int SCS(proj_nuclear_cone)(scs_float *tX, scs_int m, scs_int n, int lwork = c->lwork_nuc; int info = 0; - BLAS(gesvd) - ("S", "A", &bm, &bn, X, &bm, s, u, &bm, vt, &bn, work, &lwork, &info); + BLAS(gesvd)("S", "A", &bm, &bn, X, &bm, s, u, &bm, vt, &bn, work, &lwork, + &info); if (info != 0) { printf("WARN: LAPACK gesvd error, info = %i\n", (int)info); if (info < 0) { @@ -86,15 +86,14 @@ scs_int SCS(proj_nuclear_cone)(scs_float *tX, scs_int m, scs_int n, // ------------------------------------------------------------------------- int one = 1; for (scs_int i = 0; i < n; ++i) { - BLAS(scal) - (&bm, &tX[i + 1], &u[i * m], &one); + BLAS(scal)(&bm, &tX[i + 1], &u[i * m], &one); } char trans = 'N'; scs_float alpha = 1.0; scs_float beta = 0.0; - BLAS(gemm) - (&trans, &trans, &bm, &bn, &bn, &alpha, u, &bm, vt, &bn, &beta, tX + 1, &bm); + BLAS(gemm)(&trans, &trans, &bm, &bn, &bn, &alpha, u, &bm, vt, &bn, &beta, + tX + 1, &bm); return 0; } diff --git a/src/spectral_cones/sum-largest/sum_largest_eval_cone.c b/src/spectral_cones/sum-largest/sum_largest_eval_cone.c index 77762e38..00126b1c 100644 --- a/src/spectral_cones/sum-largest/sum_largest_eval_cone.c +++ b/src/spectral_cones/sum-largest/sum_largest_eval_cone.c @@ -123,9 +123,8 @@ scs_int SCS(proj_sum_largest_evals)(scs_float *tX, scs_int n, scs_int k, scs_float zero = 0.0; // it is not safe to have overlapping matrices for dgemm_. - BLAS(gemm) - (&transN, &transY, &nb, &nb, &nb, &one, Xs, &nb, c->work_sum_of_largest, &nb, - &zero, Z, &nb); + BLAS(gemm)(&transN, &transY, &nb, &nb, &nb, &one, Xs, &nb, + c->work_sum_of_largest, &nb, &zero, Z, &nb); BLAS(scal)(&nb, &sqrt2_inv, Z, &nb_plus_one); diff --git a/test/rng.h b/test/rng.h index ec6bb79d..6faa2a44 100644 --- a/test/rng.h +++ b/test/rng.h @@ -52,14 +52,14 @@ long ran_arr_buf[QUALITY]; long ran_arr_dummy = -1, ran_arr_started = -1; long *ran_arr_ptr = &ran_arr_dummy; /* the next random number, or -1 */ -#define TT 70 /* guaranteed separation between streams */ -#define is_odd(x) ((x)&1) /* units bit of x */ +#define TT 70 /* guaranteed separation between streams */ +#define is_odd(x) ((x) & 1) /* units bit of x */ #ifdef __STDC__ void ran_start(long seed) #else -void ran_start(seed) /* do this before using ran_array */ - long seed; /* selector for different streams */ +void ran_start(seed) /* do this before using ran_array */ + long seed; /* selector for different streams */ #endif { register int t, j; diff --git a/test/run_tests.c b/test/run_tests.c index 7f09440b..5b2e82f5 100644 --- a/test/run_tests.c +++ b/test/run_tests.c @@ -33,9 +33,9 @@ _SKIP(test_validation) /* solve SDPs, requires blas / lapack */ #if defined(USE_LAPACK) && NO_READ_WRITE == 0 +#include "problems/complex_PSD.h" #include "problems/random_prob.h" #include "problems/rob_gauss_cov_est.h" -#include "problems/complex_PSD.h" #else _SKIP(rob_gauss_cov_est) _SKIP(random_prob) diff --git a/test/spectral_cones_problems/exp_design.h b/test/spectral_cones_problems/exp_design.h index d1c4efcb..9512ce77 100644 --- a/test/spectral_cones_problems/exp_design.h +++ b/test/spectral_cones_problems/exp_design.h @@ -135,8 +135,7 @@ static const char *exp_design(void) { scs_free(k); scs_free(stgs); scs_free(d); - SCS(free_sol) - (sol); + SCS(free_sol)(sol); return fail; } diff --git a/test/spectral_cones_problems/graph_partitioning.h b/test/spectral_cones_problems/graph_partitioning.h index 2a119dfc..cff8f512 100644 --- a/test/spectral_cones_problems/graph_partitioning.h +++ b/test/spectral_cones_problems/graph_partitioning.h @@ -269,8 +269,7 @@ static const char *graph_partitioning(void) { scs_free(k); scs_free(stgs); scs_free(d); - SCS(free_sol) - (sol); + SCS(free_sol)(sol); return fail; } diff --git a/test/spectral_cones_problems/robust_pca.h b/test/spectral_cones_problems/robust_pca.h index 485e337b..189b00ac 100644 --- a/test/spectral_cones_problems/robust_pca.h +++ b/test/spectral_cones_problems/robust_pca.h @@ -247,8 +247,7 @@ static const char *robust_pca(void) { scs_free(k); scs_free(stgs); scs_free(d); - SCS(free_sol) - (sol); + SCS(free_sol)(sol); return fail; } diff --git a/test/spectral_cones_problems/several_logdet_cones.h b/test/spectral_cones_problems/several_logdet_cones.h index 2ba28e41..703248df 100644 --- a/test/spectral_cones_problems/several_logdet_cones.h +++ b/test/spectral_cones_problems/several_logdet_cones.h @@ -200,21 +200,23 @@ static const char *several_logdet_cones(void) { fail = 0; // TODO: This test fails because of the complementary slackness check. // The complementary slackness tolerance is a bit too tight. - //fail = verify_solution_correct(d, k, stgs, &info, sol, exitflag); - //if (fail) + // fail = verify_solution_correct(d, k, stgs, &info, sol, exitflag); + // if (fail) // return fail; - mu_assert("several logdet cones: primal feas error ", ABS(info.res_pri) < 5 * 1e-6); - mu_assert("several logdet cones: dual feas error ", ABS(info.res_dual) < 1e-6); - mu_assert("several logdet cones: duality gap error ", ABS(info.gap) < 5 * 1e-6); + mu_assert("several logdet cones: primal feas error ", + ABS(info.res_pri) < 5 * 1e-6); + mu_assert("several logdet cones: dual feas error ", + ABS(info.res_dual) < 1e-6); + mu_assert("several logdet cones: duality gap error ", + ABS(info.gap) < 5 * 1e-6); /* kill data */ scs_free(d->A); scs_free(k); scs_free(stgs); scs_free(d); - SCS(free_sol) - (sol); + SCS(free_sol)(sol); return fail; } diff --git a/test/spectral_cones_problems/several_nuc_cone.h b/test/spectral_cones_problems/several_nuc_cone.h index 94763531..043b1a15 100644 --- a/test/spectral_cones_problems/several_nuc_cone.h +++ b/test/spectral_cones_problems/several_nuc_cone.h @@ -279,8 +279,7 @@ static const char *several_nuc_cone(void) { scs_free(k); scs_free(stgs); scs_free(d); - SCS(free_sol) - (sol); + SCS(free_sol)(sol); return fail; } diff --git a/test/spectral_cones_problems/several_sum_largest.h b/test/spectral_cones_problems/several_sum_largest.h index d0903fd9..23de5ee0 100644 --- a/test/spectral_cones_problems/several_sum_largest.h +++ b/test/spectral_cones_problems/several_sum_largest.h @@ -414,8 +414,7 @@ static const char *several_sum_largest(void) { scs_free(k); scs_free(stgs); scs_free(d); - SCS(free_sol) - (sol); + SCS(free_sol)(sol); return fail; } From 4d195692a1884f1c30d857652e74bb14e8416250 Mon Sep 17 00:00:00 2001 From: Brendan O'Donoghue Date: Sun, 10 Aug 2025 15:12:47 +0100 Subject: [PATCH 3/3] attempt to fix pointer type mismatch --- src/spectral_cones/logdeterminant/log_cone_IPM.c | 2 +- src/spectral_cones/logdeterminant/log_cone_wrapper.c | 3 ++- src/spectral_cones/sum-largest/sum_largest_cone.c | 3 ++- src/spectral_cones/sum-largest/sum_largest_eval_cone.c | 3 ++- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/spectral_cones/logdeterminant/log_cone_IPM.c b/src/spectral_cones/logdeterminant/log_cone_IPM.c index 00a1f44f..1aea41f1 100644 --- a/src/spectral_cones/logdeterminant/log_cone_IPM.c +++ b/src/spectral_cones/logdeterminant/log_cone_IPM.c @@ -199,7 +199,7 @@ static void KKT_solve(const scs_float *z, const scs_float *w, const scs_float *rhs2, KKT_solver_workspace *KKT_work, scs_float *du1, scs_float *dr, scs_float *dz, scs_float *ds) { - scs_int u_dim = n + 3; + blas_int u_dim = n + 3; scs_int u1_dim = n + 2; scs_float *temp1 = KKT_work->temp1; diff --git a/src/spectral_cones/logdeterminant/log_cone_wrapper.c b/src/spectral_cones/logdeterminant/log_cone_wrapper.c index caf88f1c..b7c7c3f3 100644 --- a/src/spectral_cones/logdeterminant/log_cone_wrapper.c +++ b/src/spectral_cones/logdeterminant/log_cone_wrapper.c @@ -201,4 +201,5 @@ static void check_opt_cond_log_cone(const scs_float *tvx, scs_float t0, residuals[1] = pri_res / MAX(pri_norm, 1.0); double scale = MAX(pri_norm, 1.0); residuals[2] = complementarity / MAX(scale, dual_norm); -} \ No newline at end of file +} + diff --git a/src/spectral_cones/sum-largest/sum_largest_cone.c b/src/spectral_cones/sum-largest/sum_largest_cone.c index 2575c54c..bef4ec36 100644 --- a/src/spectral_cones/sum-largest/sum_largest_cone.c +++ b/src/spectral_cones/sum-largest/sum_largest_cone.c @@ -192,4 +192,5 @@ static void compute_cone_residuals(scs_float t, const scs_float *x, scs_free(lmbda_x); } -#endif \ No newline at end of file +#endif + diff --git a/src/spectral_cones/sum-largest/sum_largest_eval_cone.c b/src/spectral_cones/sum-largest/sum_largest_eval_cone.c index 00126b1c..557a9c2e 100644 --- a/src/spectral_cones/sum-largest/sum_largest_eval_cone.c +++ b/src/spectral_cones/sum-largest/sum_largest_eval_cone.c @@ -136,4 +136,5 @@ scs_int SCS(proj_sum_largest_evals)(scs_float *tX, scs_int n, scs_int k, tX[0] *= sqrt2_inv; return 0; -} \ No newline at end of file +} +