Skip to content

Commit 785287c

Browse files
authored
Fix memory corruptions around pg_dist_node accessors after a Citus downgrade is followed by an upgrade (#8144)
Unlike what has been fixed in #7950, #8120, #8124, #8121 and #8114, this was not an issue in older releases but is a potential issue to be introduced by the current (13.2) release because in one of recent commits (#8122) two columns has been added to pg_dist_node. In other words, none of the older releases since we started supporting downgrades added new columns to pg_dist_node. The mentioned PR actually attempted avoiding these kind of issues in one of the code-paths but not in some others. So, this PR, avoids memory corruptions around pg_dist_node accessors in a standardized way (as implemented in other example PRs) and in all code-paths.
1 parent 86b5bc6 commit 785287c

File tree

1 file changed

+116
-66
lines changed

1 file changed

+116
-66
lines changed

src/backend/distributed/metadata/node_metadata.c

Lines changed: 116 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,8 @@ static bool NodeIsLocal(WorkerNode *worker);
115115
static void SetLockTimeoutLocally(int32 lock_cooldown);
116116
static void UpdateNodeLocation(int32 nodeId, char *newNodeName, int32 newNodePort,
117117
bool localOnly);
118+
static int GetNodePrimaryNodeIdAttrIndexInPgDistNode(TupleDesc tupleDesc);
119+
static int GetNodeIsCloneAttrIndexInPgDistNode(TupleDesc tupleDesc);
118120
static bool UnsetMetadataSyncedForAllWorkers(void);
119121
static char * GetMetadataSyncCommandToSetNodeColumn(WorkerNode *workerNode,
120122
int columnIndex,
@@ -1196,16 +1198,25 @@ ActivateNodeList(MetadataSyncContext *context)
11961198
void
11971199
ActivateCloneNodeAsPrimary(WorkerNode *workerNode)
11981200
{
1201+
Relation pgDistNode = table_open(DistNodeRelationId(), AccessShareLock);
1202+
TupleDesc tupleDescriptor = RelationGetDescr(pgDistNode);
1203+
TupleDesc copiedTupleDescriptor = CreateTupleDescCopy(tupleDescriptor);
1204+
table_close(pgDistNode, AccessShareLock);
1205+
11991206
/*
12001207
* Set the node as primary and active.
12011208
*/
12021209
SetWorkerColumnLocalOnly(workerNode, Anum_pg_dist_node_noderole,
12031210
ObjectIdGetDatum(PrimaryNodeRoleId()));
12041211
SetWorkerColumnLocalOnly(workerNode, Anum_pg_dist_node_isactive,
12051212
BoolGetDatum(true));
1206-
SetWorkerColumnLocalOnly(workerNode, Anum_pg_dist_node_nodeisclone,
1213+
SetWorkerColumnLocalOnly(workerNode,
1214+
GetNodeIsCloneAttrIndexInPgDistNode(copiedTupleDescriptor) +
1215+
1,
12071216
BoolGetDatum(false));
1208-
SetWorkerColumnLocalOnly(workerNode, Anum_pg_dist_node_nodeprimarynodeid,
1217+
SetWorkerColumnLocalOnly(workerNode,
1218+
GetNodePrimaryNodeIdAttrIndexInPgDistNode(
1219+
copiedTupleDescriptor) + 1,
12091220
Int32GetDatum(0));
12101221
SetWorkerColumnLocalOnly(workerNode, Anum_pg_dist_node_hasmetadata,
12111222
BoolGetDatum(true));
@@ -1779,14 +1790,14 @@ UpdateNodeLocation(int32 nodeId, char *newNodeName, int32 newNodePort, bool loca
17791790
{
17801791
const bool indexOK = true;
17811792

1782-
ScanKeyData scanKey[1];
1783-
Datum values[Natts_pg_dist_node];
1784-
bool isnull[Natts_pg_dist_node];
1785-
bool replace[Natts_pg_dist_node];
1786-
17871793
Relation pgDistNode = table_open(DistNodeRelationId(), RowExclusiveLock);
17881794
TupleDesc tupleDescriptor = RelationGetDescr(pgDistNode);
17891795

1796+
ScanKeyData scanKey[1];
1797+
Datum *values = palloc0(tupleDescriptor->natts * sizeof(Datum));
1798+
bool *isnull = palloc0(tupleDescriptor->natts * sizeof(bool));
1799+
bool *replace = palloc0(tupleDescriptor->natts * sizeof(bool));
1800+
17901801
ScanKeyInit(&scanKey[0], Anum_pg_dist_node_nodeid,
17911802
BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(nodeId));
17921803

@@ -1801,8 +1812,6 @@ UpdateNodeLocation(int32 nodeId, char *newNodeName, int32 newNodePort, bool loca
18011812
newNodeName, newNodePort)));
18021813
}
18031814

1804-
memset(replace, 0, sizeof(replace));
1805-
18061815
values[Anum_pg_dist_node_nodeport - 1] = Int32GetDatum(newNodePort);
18071816
isnull[Anum_pg_dist_node_nodeport - 1] = false;
18081817
replace[Anum_pg_dist_node_nodeport - 1] = true;
@@ -1835,6 +1844,10 @@ UpdateNodeLocation(int32 nodeId, char *newNodeName, int32 newNodePort, bool loca
18351844

18361845
systable_endscan(scanDescriptor);
18371846
table_close(pgDistNode, NoLock);
1847+
1848+
pfree(values);
1849+
pfree(isnull);
1850+
pfree(replace);
18381851
}
18391852

18401853

@@ -2105,11 +2118,10 @@ citus_internal_mark_node_not_synced(PG_FUNCTION_ARGS)
21052118
Relation pgDistNode = table_open(DistNodeRelationId(), AccessShareLock);
21062119
TupleDesc tupleDescriptor = RelationGetDescr(pgDistNode);
21072120

2108-
Datum values[Natts_pg_dist_node];
2109-
bool isnull[Natts_pg_dist_node];
2110-
bool replace[Natts_pg_dist_node];
2121+
Datum *values = palloc0(tupleDescriptor->natts * sizeof(Datum));
2122+
bool *isnull = palloc0(tupleDescriptor->natts * sizeof(bool));
2123+
bool *replace = palloc0(tupleDescriptor->natts * sizeof(bool));
21112124

2112-
memset(replace, 0, sizeof(replace));
21132125
values[Anum_pg_dist_node_metadatasynced - 1] = DatumGetBool(false);
21142126
isnull[Anum_pg_dist_node_metadatasynced - 1] = false;
21152127
replace[Anum_pg_dist_node_metadatasynced - 1] = true;
@@ -2123,6 +2135,10 @@ citus_internal_mark_node_not_synced(PG_FUNCTION_ARGS)
21232135

21242136
table_close(pgDistNode, NoLock);
21252137

2138+
pfree(values);
2139+
pfree(isnull);
2140+
pfree(replace);
2141+
21262142
PG_RETURN_VOID();
21272143
}
21282144

@@ -2831,17 +2847,16 @@ SetWorkerColumnLocalOnly(WorkerNode *workerNode, int columnIndex, Datum value)
28312847
TupleDesc tupleDescriptor = RelationGetDescr(pgDistNode);
28322848
HeapTuple heapTuple = GetNodeTuple(workerNode->workerName, workerNode->workerPort);
28332849

2834-
Datum values[Natts_pg_dist_node];
2835-
bool isnull[Natts_pg_dist_node];
2836-
bool replace[Natts_pg_dist_node];
2850+
Datum *values = palloc0(tupleDescriptor->natts * sizeof(Datum));
2851+
bool *isnull = palloc0(tupleDescriptor->natts * sizeof(bool));
2852+
bool *replace = palloc0(tupleDescriptor->natts * sizeof(bool));
28372853

28382854
if (heapTuple == NULL)
28392855
{
28402856
ereport(ERROR, (errmsg("could not find valid entry for node \"%s:%d\"",
28412857
workerNode->workerName, workerNode->workerPort)));
28422858
}
28432859

2844-
memset(replace, 0, sizeof(replace));
28452860
values[columnIndex - 1] = value;
28462861
isnull[columnIndex - 1] = false;
28472862
replace[columnIndex - 1] = true;
@@ -2857,6 +2872,10 @@ SetWorkerColumnLocalOnly(WorkerNode *workerNode, int columnIndex, Datum value)
28572872

28582873
table_close(pgDistNode, NoLock);
28592874

2875+
pfree(values);
2876+
pfree(isnull);
2877+
pfree(replace);
2878+
28602879
return newWorkerNode;
28612880
}
28622881

@@ -3241,16 +3260,15 @@ InsertPlaceholderCoordinatorRecord(void)
32413260
static void
32423261
InsertNodeRow(int nodeid, char *nodeName, int32 nodePort, NodeMetadata *nodeMetadata)
32433262
{
3244-
Datum values[Natts_pg_dist_node];
3245-
bool isNulls[Natts_pg_dist_node];
3263+
Relation pgDistNode = table_open(DistNodeRelationId(), RowExclusiveLock);
3264+
TupleDesc tupleDescriptor = RelationGetDescr(pgDistNode);
3265+
3266+
Datum *values = palloc0(tupleDescriptor->natts * sizeof(Datum));
3267+
bool *isNulls = palloc0(tupleDescriptor->natts * sizeof(bool));
32463268

32473269
Datum nodeClusterStringDatum = CStringGetDatum(nodeMetadata->nodeCluster);
32483270
Datum nodeClusterNameDatum = DirectFunctionCall1(namein, nodeClusterStringDatum);
32493271

3250-
/* form new shard tuple */
3251-
memset(values, 0, sizeof(values));
3252-
memset(isNulls, false, sizeof(isNulls));
3253-
32543272
values[Anum_pg_dist_node_nodeid - 1] = UInt32GetDatum(nodeid);
32553273
values[Anum_pg_dist_node_groupid - 1] = Int32GetDatum(nodeMetadata->groupId);
32563274
values[Anum_pg_dist_node_nodename - 1] = CStringGetTextDatum(nodeName);
@@ -3264,14 +3282,10 @@ InsertNodeRow(int nodeid, char *nodeName, int32 nodePort, NodeMetadata *nodeMeta
32643282
values[Anum_pg_dist_node_nodecluster - 1] = nodeClusterNameDatum;
32653283
values[Anum_pg_dist_node_shouldhaveshards - 1] = BoolGetDatum(
32663284
nodeMetadata->shouldHaveShards);
3267-
values[Anum_pg_dist_node_nodeisclone - 1] = BoolGetDatum(
3268-
nodeMetadata->nodeisclone);
3269-
values[Anum_pg_dist_node_nodeprimarynodeid - 1] = Int32GetDatum(
3270-
nodeMetadata->nodeprimarynodeid);
3271-
3272-
Relation pgDistNode = table_open(DistNodeRelationId(), RowExclusiveLock);
3273-
3274-
TupleDesc tupleDescriptor = RelationGetDescr(pgDistNode);
3285+
values[GetNodeIsCloneAttrIndexInPgDistNode(tupleDescriptor)] =
3286+
BoolGetDatum(nodeMetadata->nodeisclone);
3287+
values[GetNodePrimaryNodeIdAttrIndexInPgDistNode(tupleDescriptor)] =
3288+
Int32GetDatum(nodeMetadata->nodeprimarynodeid);
32753289
HeapTuple heapTuple = heap_form_tuple(tupleDescriptor, values, isNulls);
32763290

32773291
CATALOG_INSERT_WITH_SNAPSHOT(pgDistNode, heapTuple);
@@ -3283,6 +3297,9 @@ InsertNodeRow(int nodeid, char *nodeName, int32 nodePort, NodeMetadata *nodeMeta
32833297

32843298
/* close relation */
32853299
table_close(pgDistNode, NoLock);
3300+
3301+
pfree(values);
3302+
pfree(isNulls);
32863303
}
32873304

32883305

@@ -3397,43 +3414,30 @@ TupleToWorkerNode(Relation pgDistNode, TupleDesc tupleDescriptor, HeapTuple heap
33973414
1]);
33983415

33993416
/*
3400-
* Attributes above this line are guaranteed to be present at the
3401-
* exact defined attribute number. Atleast till now. If you are droping or
3402-
* adding any of the above columns consider adjusting the code above
3417+
* nodecluster, nodeisclone and nodeprimarynodeid columns can be missing. In case
3418+
* of extension creation/upgrade, master_initialize_node_metadata function is
3419+
* called before the nodecluster column is added to pg_dist_node table.
34033420
*/
3404-
Oid pgDistNodeRelId = RelationGetRelid(pgDistNode);
3405-
3406-
AttrNumber nodeClusterAttno = get_attnum(pgDistNodeRelId, "nodecluster");
34073421

3408-
if (nodeClusterAttno > 0 &&
3409-
!TupleDescAttr(tupleDescriptor, nodeClusterAttno - 1)->attisdropped &&
3410-
!isNullArray[nodeClusterAttno - 1])
3422+
if (!isNullArray[Anum_pg_dist_node_nodecluster - 1])
34113423
{
34123424
Name nodeClusterName =
3413-
DatumGetName(datumArray[nodeClusterAttno - 1]);
3425+
DatumGetName(datumArray[Anum_pg_dist_node_nodecluster - 1]);
34143426
char *nodeClusterString = NameStr(*nodeClusterName);
34153427
strlcpy(workerNode->nodeCluster, nodeClusterString, NAMEDATALEN);
34163428
}
34173429

3418-
if (nAtts > Anum_pg_dist_node_nodeisclone)
3430+
int nodeIsCloneIdx = GetNodeIsCloneAttrIndexInPgDistNode(tupleDescriptor);
3431+
int nodePrimaryNodeIdIdx = GetNodePrimaryNodeIdAttrIndexInPgDistNode(tupleDescriptor);
3432+
3433+
if (!isNullArray[nodeIsCloneIdx])
34193434
{
3420-
AttrNumber nodeIsCloneAttno = get_attnum(pgDistNodeRelId, "nodeisclone");
3421-
if (nodeIsCloneAttno > 0 &&
3422-
!TupleDescAttr(tupleDescriptor, nodeIsCloneAttno - 1)->attisdropped &&
3423-
!isNullArray[nodeIsCloneAttno - 1])
3424-
{
3425-
workerNode->nodeisclone = DatumGetBool(datumArray[nodeIsCloneAttno - 1]);
3426-
}
3427-
AttrNumber nodePrimaryNodeIdAttno = get_attnum(pgDistNodeRelId,
3428-
"nodeprimarynodeid");
3429-
if (nodePrimaryNodeIdAttno > 0 &&
3430-
!TupleDescAttr(tupleDescriptor, nodePrimaryNodeIdAttno - 1)->attisdropped &&
3431-
!isNullArray[nodePrimaryNodeIdAttno - 1])
3432-
{
3433-
workerNode->nodeprimarynodeid = DatumGetInt32(datumArray[
3434-
nodePrimaryNodeIdAttno - 1])
3435-
;
3436-
}
3435+
workerNode->nodeisclone = DatumGetBool(datumArray[nodeIsCloneIdx]);
3436+
}
3437+
3438+
if (!isNullArray[nodePrimaryNodeIdIdx])
3439+
{
3440+
workerNode->nodeprimarynodeid = DatumGetInt32(datumArray[nodePrimaryNodeIdIdx]);
34373441
}
34383442

34393443
pfree(datumArray);
@@ -3443,6 +3447,48 @@ TupleToWorkerNode(Relation pgDistNode, TupleDesc tupleDescriptor, HeapTuple heap
34433447
}
34443448

34453449

3450+
/*
3451+
* GetNodePrimaryNodeIdAttrIndexInPgDistNode returns attrnum for nodeprimarynodeid attr.
3452+
*
3453+
* nodeprimarynodeid attr was added to table pg_dist_node using alter operation
3454+
* after the version where Citus started supporting downgrades, and it's one of
3455+
* the two columns that we've introduced to pg_dist_node since then.
3456+
*
3457+
* And in case of a downgrade + upgrade, tupleDesc->natts becomes greater than
3458+
* Natts_pg_dist_node and when this happens, then we know that attrnum
3459+
* nodeprimarynodeid is not Anum_pg_dist_node_nodeprimarynodeid anymore but
3460+
* tupleDesc->natts - 1.
3461+
*/
3462+
static int
3463+
GetNodePrimaryNodeIdAttrIndexInPgDistNode(TupleDesc tupleDesc)
3464+
{
3465+
return tupleDesc->natts == Natts_pg_dist_node
3466+
? (Anum_pg_dist_node_nodeprimarynodeid - 1)
3467+
: tupleDesc->natts - 1;
3468+
}
3469+
3470+
3471+
/*
3472+
* GetNodeIsCloneAttrIndexInPgDistNode returns attrnum for nodeisclone attr.
3473+
*
3474+
* Like, GetNodePrimaryNodeIdAttrIndexInPgDistNode(), performs a similar
3475+
* calculation for nodeisclone attribute because this is column added to
3476+
* pg_dist_node after we started supporting downgrades.
3477+
*
3478+
* Only difference with the mentioned function is that we know
3479+
* the attrnum for nodeisclone is not Anum_pg_dist_node_nodeisclone anymore
3480+
* but tupleDesc->natts - 2 because we added these columns consecutively
3481+
* and we first add nodeisclone attribute and then nodeprimarynodeid attribute.
3482+
*/
3483+
static int
3484+
GetNodeIsCloneAttrIndexInPgDistNode(TupleDesc tupleDesc)
3485+
{
3486+
return tupleDesc->natts == Natts_pg_dist_node
3487+
? (Anum_pg_dist_node_nodeisclone - 1)
3488+
: tupleDesc->natts - 2;
3489+
}
3490+
3491+
34463492
/*
34473493
* StringToDatum transforms a string representation into a Datum.
34483494
*/
@@ -3519,15 +3565,15 @@ UnsetMetadataSyncedForAllWorkers(void)
35193565
updatedAtLeastOne = true;
35203566
}
35213567

3568+
Datum *values = palloc(tupleDescriptor->natts * sizeof(Datum));
3569+
bool *isnull = palloc(tupleDescriptor->natts * sizeof(bool));
3570+
bool *replace = palloc(tupleDescriptor->natts * sizeof(bool));
3571+
35223572
while (HeapTupleIsValid(heapTuple))
35233573
{
3524-
Datum values[Natts_pg_dist_node];
3525-
bool isnull[Natts_pg_dist_node];
3526-
bool replace[Natts_pg_dist_node];
3527-
3528-
memset(replace, false, sizeof(replace));
3529-
memset(isnull, false, sizeof(isnull));
3530-
memset(values, 0, sizeof(values));
3574+
memset(values, 0, tupleDescriptor->natts * sizeof(Datum));
3575+
memset(isnull, 0, tupleDescriptor->natts * sizeof(bool));
3576+
memset(replace, 0, tupleDescriptor->natts * sizeof(bool));
35313577

35323578
values[Anum_pg_dist_node_metadatasynced - 1] = BoolGetDatum(false);
35333579
replace[Anum_pg_dist_node_metadatasynced - 1] = true;
@@ -3550,6 +3596,10 @@ UnsetMetadataSyncedForAllWorkers(void)
35503596
CatalogCloseIndexes(indstate);
35513597
table_close(relation, NoLock);
35523598

3599+
pfree(values);
3600+
pfree(isnull);
3601+
pfree(replace);
3602+
35533603
return updatedAtLeastOne;
35543604
}
35553605

0 commit comments

Comments
 (0)