SI疾病传播模型实现
程序员文章站
2022-07-14 13:58:28
...
在SI疾病传播模型中,网络中的节点在任一时刻有两种可能的状态,易感态susceptible(S)和感染态infected(I)。处于易感态(S)的节点当被感染后转变为感染态(I)并且不能恢复。我们假设在t_0时刻网络中除了一个节点被感染了之外,这个节点就是传播源,其余的所有节点都处于易感态。之后传播源以一定的疾病传播概率(rates of infection)感染它的邻居,与此同时,疾病或者是信息开始在网络中传播。
SI传播模型的C语言实现(完整代码见下)
@sourceNode为疾病的传播源
@rateOfInfection为疾病的传播概率
思路:从传播源开始,对于该传播源的所有的邻居节点,以一定的概率将疾病传染给这些邻居节点。(在传播的时候,随机选择邻居进行传播而不是按照一定的顺序,例如节点的id)。然后统计网络中所有被疾病感染的节点,对于这些节点,随机依次的选择其中的节点作为“传播源”将疾病试图传染给自己未被感染的邻居。(同样在选择邻居进行传染的时候,也是随机的挑选邻居尝试传染)。
void SI(int sourceNode, double rateOfInfection)
{
int i, j;
double probability_infected = 0.0;
int clock = 1;
int stop = 0;
int I_nodes, S_neighbors, count1, count2,temp_I_array_length, temp_S_array_length;
int rand_infected_node_index, rand_infected_node_id, rand_sus_node_index, rand_sus_node_id;
I_nodes = S_neighbors = count1 = count2 = 0;
int* I_node_array;
int* S_neighbor_array;
infected_sequences[sourceNode].S_or_I = infected_sequences[sourceNode].times = 1;
infected_sequences[sourceNode].from = 0;
while(1)
{
clock++; //记录感染时间
I_nodes = 0; //记录被感染的节点的个数
for( i = 1; i <= network_size; i++ )
if( infected_sequences[i].S_or_I )
I_nodes++;
if( !(I_node_array = (int*)malloc(sizeof(int) * (I_nodes + 1))) )
{
printf("malloc I_node_array* error\n");
exit(0);
}
count1 = 1;
for( i = 1; i <= network_size; i++ )
if( infected_sequences[i].S_or_I )
I_node_array[count1++] = i; //构造已经被感染的节点的集合
temp_I_array_length = I_nodes;
for( i = 1; i <= I_nodes; i++ )
{
rand_infected_node_index = rand() % temp_I_array_length + 1; //随机选择已经感染的节点
rand_infected_node_id = I_node_array[rand_infected_node_index];
I_node_array[rand_infected_node_index] = I_node_array[temp_I_array_length--];
S_neighbors = 0;
for( j = 1; j <= network_size; j++ ) //统计已感染节点的没有感染的邻居节点个数
if( adjacentMatrix[rand_infected_node_id][j] && !( infected_sequences[j].S_or_I ) )
S_neighbors++;
if( S_neighbors == 0 )
continue;
if( !(S_neighbor_array = (int*)malloc(sizeof(int) * (S_neighbors + 1))) )
{
printf("malloc S_neighbor_array* error\n");
exit(0);
}
int count2 = 1;
for( j = 1; j <= network_size; j++ )
if( adjacentMatrix[rand_infected_node_id][j] && !( infected_sequences[j].S_or_I ) )
S_neighbor_array[count2++] = j;
temp_S_array_length = S_neighbors;
for( j = 1; j <= S_neighbors; j++ )
{
//随机选择没有感染的节点进行传播
rand_sus_node_index = rand() % temp_S_array_length + 1;
rand_sus_node_id = S_neighbor_array[rand_sus_node_index];
S_neighbor_array[rand_sus_node_index] = S_neighbor_array[temp_S_array_length--];
probability_infected = (double)(rand() % 1000) / (double)1000;
if( infected_sequences[rand_sus_node_id].S_or_I = probability_infected < rateOfInfection ? 1 : 0 )
{
infected_sequences[rand_sus_node_id].times = clock;
infected_sequences[rand_sus_node_id].from = rand_infected_node_id;
}
}
free(S_neighbor_array);
S_neighbor_array = NULL;
}
free(I_node_array);
I_node_array = NULL;
stop = 0;
for( i = 1; i <= network_size; i++ )
stop += infected_sequences[i].S_or_I;
if( network_size == stop )
break;
}
//可以输出到文件进行保存
//printf("SI process:\n");
//printf("node id :");for( i = 1; i <= network_size; i++ )printf("%5d", i);printf("\n");
//printf("node times:");for( i = 1; i <= network_size; i++ )printf("%5d", infected_sequences[i].times);printf("\n");
//printf("node from :");for( i = 1; i <= network_size; i++ )printf("%5d", infected_sequences[i].from);printf("\n");
}
使用gephi进行可视化
#include<stdio.h>
#include<string.h>
#include<stdlib.h>
#include<time.h>
int network_size;
int edges_size;
short** adjacentMatrix;
double rateOfInfection;
char filename[100];
typedef struct{
int S_or_I; //标记该节点是否被感染
int times; //记录该节点的感染时间
int from; //记录该节点的感染来源
}SI_Node;
SI_Node* infected_sequences;
void init();
void readNetworkAndTransformFormat();
void SI(int sourceNode, double rateOfInfection);
void saveSIResult();
int main(int argc, char** argv)
{
if( argc != 6 )
{
printf("This algorithm require 3 paramenters\n");
printf("\t1.network size(vertaies number)\n");
printf("\t2.edges size\n");
printf("\t3.propagation probability\n");
printf("\t4.file name contain edges information\n");
printf("\t5.the source node ID\n");
printf("\t\texample: a.exe 332 2126 0.5 edgesForSIModel.data 255\n");
exit(0);
}
srand((unsigned)time(NULL));
network_size = atoi(argv[1]);
edges_size = atoi(argv[2]);
rateOfInfection = atof(argv[3]);
strcat(filename, argv[4]);
int sourceID = atoi(argv[5]);
printf("show information of input: network_size: %d,edges number: %d,propagation probability: %f,file name: %s, source node ID: %d\n", network_size, edges_size, rateOfInfection, filename, sourceID);
init();
readNetworkAndTransformFormat();
SI(sourceID, rateOfInfection);
saveSIResult();
return 0;
}
void init()
{
int i, j;
if( !(adjacentMatrix = (short**)malloc(sizeof(short*) * (network_size + 1))) )
{
printf("adjacentMatrix** malloc error");
exit(0);
}
for( i = 1; i <= network_size; i++ )
{
if( !(adjacentMatrix[i] = (short*)malloc(sizeof(short) * (network_size + 1))) )
{
printf("adjacentMatrix[%d]* malloc error");
exit(0);
}
}
for( i = 1; i <= network_size; i++ )
for( j = 1; j <= network_size; j++ )
adjacentMatrix[i][j] = 0;
if( !(infected_sequences = (SI_Node*)malloc(sizeof(SI_Node) * (network_size + 1))) )
{
printf("infected_sequences* malloc error");
exit(0);
}
for( i = 1; i <= network_size; i++ )
infected_sequences[i].S_or_I = infected_sequences[i].times = infected_sequences[i].from = 0;
}
/*
* 大多数的真实网络给出的形式为source target,在这里进行转化为邻接矩阵
* */
void readNetworkAndTransformFormat()
{
int i, j, source, target;
source = target = 0;
FILE* fread;
if( NULL == (fread = fopen(filename, "r")) )
{
printf("open file error");
exit(0);
}
for( i = 1; i <= edges_size; i++ )
{
if( 2 != fscanf(fread, "%d %d", &source, &target) )
{
printf("fscanf error: %d", i);
exit(0);
}
adjacentMatrix[source][target] = adjacentMatrix[target][source] = 1;
}
fclose(fread);
/*
for( i = 1; i <= network_size; i++ )
{
for( j = 1; j <= network_size; j++ )
printf("%d ", adjacentMatrix[i][j]);
printf("\n");
}
*/
}
/*
* 使用SI模型进行传播
* */
void SI(int sourceNode, double rateOfInfection)
{
int i, j;
double probability_infected = 0.0;
int clock = 1;
int stop = 0;
int I_nodes, S_neighbors, count1, count2,temp_I_array_length, temp_S_array_length;
int rand_infected_node_index, rand_infected_node_id, rand_sus_node_index, rand_sus_node_id;
I_nodes = S_neighbors = count1 = count2 = 0;
int* I_node_array;
int* S_neighbor_array;
infected_sequences[sourceNode].S_or_I = infected_sequences[sourceNode].times = 1;
infected_sequences[sourceNode].from = 0;
while(1)
{
clock++; //记录感染时间
I_nodes = 0; //记录被感染的节点的个数
for( i = 1; i <= network_size; i++ )
if( infected_sequences[i].S_or_I )
I_nodes++;
if( !(I_node_array = (int*)malloc(sizeof(int) * (I_nodes + 1))) )
{
printf("malloc I_node_array* error\n");
exit(0);
}
count1 = 1;
for( i = 1; i <= network_size; i++ )
if( infected_sequences[i].S_or_I )
I_node_array[count1++] = i; //构造已经被感染的节点的集合
temp_I_array_length = I_nodes;
for( i = 1; i <= I_nodes; i++ )
{
rand_infected_node_index = rand() % temp_I_array_length + 1; //随机选择已经感染的节点
rand_infected_node_id = I_node_array[rand_infected_node_index];
I_node_array[rand_infected_node_index] = I_node_array[temp_I_array_length--];
S_neighbors = 0;
for( j = 1; j <= network_size; j++ ) //统计已感染节点的没有感染的邻居节点个数
if( adjacentMatrix[rand_infected_node_id][j] && !( infected_sequences[j].S_or_I ) )
S_neighbors++;
if( S_neighbors == 0 )
continue;
if( !(S_neighbor_array = (int*)malloc(sizeof(int) * (S_neighbors + 1))) )
{
printf("malloc S_neighbor_array* error\n");
exit(0);
}
int count2 = 1;
for( j = 1; j <= network_size; j++ )
if( adjacentMatrix[rand_infected_node_id][j] && !( infected_sequences[j].S_or_I ) )
S_neighbor_array[count2++] = j;
temp_S_array_length = S_neighbors;
for( j = 1; j <= S_neighbors; j++ )
{
//随机选择没有感染的节点进行传播
rand_sus_node_index = rand() % temp_S_array_length + 1;
rand_sus_node_id = S_neighbor_array[rand_sus_node_index];
S_neighbor_array[rand_sus_node_index] = S_neighbor_array[temp_S_array_length--];
probability_infected = (double)(rand() % 1000) / (double)1000;
if( infected_sequences[rand_sus_node_id].S_or_I = probability_infected < rateOfInfection ? 1 : 0 )
{
infected_sequences[rand_sus_node_id].times = clock;
infected_sequences[rand_sus_node_id].from = rand_infected_node_id;
}
}
free(S_neighbor_array);
S_neighbor_array = NULL;
}
free(I_node_array);
I_node_array = NULL;
stop = 0;
for( i = 1; i <= network_size; i++ )
stop += infected_sequences[i].S_or_I;
if( network_size == stop )
break;
}
//可以输出到文件进行保存
//printf("SI process:\n");
//printf("node id :");for( i = 1; i <= network_size; i++ )printf("%5d", i);printf("\n");
//printf("node times:");for( i = 1; i <= network_size; i++ )printf("%5d", infected_sequences[i].times);printf("\n");
//printf("node from :");for( i = 1; i <= network_size; i++ )printf("%5d", infected_sequences[i].from);printf("\n");
}
/*
* 用于gephi作图
* */
void saveSIResult()
{
int i, j;
FILE *fNodeInfo, *fEdgeInfo;
if( NULL == (fNodeInfo = fopen("nodeInfo.csv", "w")) )
{
printf("nodeInfo.csv open error");
exit(0);
}
if( NULL == (fEdgeInfo = fopen("edgeInfo.csv", "w")) )
{
printf("edgeInfo.csv open error");
exit(0);
}
//节点和节点的感染时间
fprintf(fNodeInfo, "id,label,color\n");
for( i = 1; i <= network_size; i++ )
fprintf(fNodeInfo, "%d,%d,%d\n", i, i, infected_sequences[i].times);
fclose(fNodeInfo);
fprintf(fEdgeInfo, "source,target\n");
for( i = 1; i <= network_size; i++ )
{
for( j = i + 1; j <= network_size; j++ )
{
if( adjacentMatrix[i][j] && infected_sequences[i].from == j )
{
//printf("%d --> %d\n",j, i);
fprintf(fEdgeInfo, "%d,%d\n", j, i);
}else if( adjacentMatrix[i][j] && infected_sequences[j].from == i )
{
//printf("%d --> %d\n", i, j);
fprintf(fEdgeInfo, "%d,%d\n", i, j);
}else if( adjacentMatrix[i][j] )
{
//printf("%d --- %d\n", i, j);
fprintf(fEdgeInfo, "%d,%d\n", i, j);
}
}
}
fclose(fEdgeInfo);
}
代码中使用了USArir网络(美国航空网络,数据下载地址:http://vlado.fmf.uni-lj.si/pub/networks/data/mix/USAir97.net)。原网络为含权网路,包含332个机场和2126条航线。
下图为使用gephi可视化该网络的结果。
运行以上代码,通过输出的csv文件,可以绘制出疾病传播的趋势,黄色的节点为传播源,深红到深蓝的渐变表示感染时间的增加。即越红的节点表示越早感染,越蓝的节点表示该节点越晚被感染。
如果只是保留有疾病传播的路径那么可以得到下图
上一篇: 在yarn中的运行wordcount
下一篇: 改善Python程序的建议总结
推荐阅读