Add a changelog
[pgp-tools.git] / keyanalyze / keyanalyze.c
1 /* keyanalyze.c
2 * Does some analysis of pre-monged pgp keyrings for some interesting data.
3 * Some code (c)2001 M. Drew Streib <dtype@dtype.org>
4 * Some code (c)2001 Thomas Roessler <roessler@does-not-exist.org>
5 * Some code (c)2001 Hal J. Burch <hburch@halport.lumeta.com>
6 * Some code (c)2001 Matt Kraai <kraai@alumni.carnegiemellon.edu>
7 * Some Code (c)2001 Steve Langasek <vorlon@netexpress.net>
8 *
9 * You are licenced to use this code under the terms of the GNU General
10 * Public License (GPL) version 2.
11 */
12
13 /* some configurables */
14 static char *infile = "preprocess.keys";
15 static char *outdir = "output/";
16 static int outsubdirs = 1; /* create output/12/12345678 or output/12345678 */
17 #define MAXKEYS 160000 /* MUST be > `grep p preprocess.keys | wc` */
18 #define MINSETSIZE 10 /* minimum set size we care about for strong sets */
19 #define MAXHOPS 30 /* max hop count we care about for report */
20
21 /* includes */
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <sys/types.h>
26 #include <sys/stat.h>
27 #include <fcntl.h>
28 #include <unistd.h>
29 #include <pthread.h>
30
31 /* globals */
32 struct sig {
33 int id;
34 struct sig *next;
35 };
36 typedef struct sig sig;
37
38 struct threadparam {
39 int threadnum;
40 };
41 typedef struct threadparam threadparam;
42
43 struct keydata {
44 unsigned int id1;
45 unsigned int id2;
46 sig *to;
47 sig *from;
48 };
49
50 struct keydata keys[MAXKEYS];
51 FILE *fpin,*fpout,*fpstat,*fpsets,*fpmsd;
52 int numkeys = 0;
53 int numsigs = 0;
54 int component[MAXKEYS];
55 int max_component;
56 int max_size;
57 int reachable[MAXKEYS];
58 int num_reachable;
59 float meantotal;
60 pthread_mutex_t mean_l;
61
62 /* declarations */
63 void AddKey (unsigned char *newid);
64 void AddSig (int src, int dst);
65 void CloseFiles();
66 int CountSigs(sig *current);
67 unsigned int ConvertFromHex (const unsigned char *c);
68 int GetKeyById(const unsigned char* searchid);
69 void MeanCrawler(int *distset, int id, int len);
70 float MeanDistance(int id, int *hops, int *hophigh, sig **farthest);
71
72 /* ################################################################# */
73 /* helper functions, in alpha order */
74
75 void AddKey (unsigned char *newid) {
76 struct keydata *key = &keys[numkeys++];
77
78 /* assume no dupes for now */
79 key->id1 = ConvertFromHex(newid);
80 key->id2 = ConvertFromHex(newid+8);
81 }
82
83 void AddKeyToList(sig **pptr, int id)
84 {
85 while (*pptr)
86 pptr = &(*pptr)->next;
87
88 *pptr = (sig *) calloc (1,sizeof(sig));
89 (*pptr)->id = id;
90 }
91
92 void AddSig (int src, int dst) {
93 /* if GetKeyById returned -1, then we exit here */
94 if ((src == -1) || (dst == -1))
95 return;
96
97 AddKeyToList(&keys[dst].to, src);
98 AddKeyToList(&keys[src].from, dst);
99
100 numsigs++;
101 }
102
103 void CloseFiles() {
104 fclose(fpin);
105 fclose(fpout);
106 }
107
108 int CountSigs(sig *current) {
109 int ret = 0;
110
111 while (current->next) {
112 current = current->next;
113 ret++;
114 }
115
116 return ret;
117 }
118
119 unsigned int ConvertFromHex (const unsigned char *c) {
120 unsigned char buf1[5];
121 unsigned char buf2[5];
122 unsigned int ret;
123
124 buf1[4] = 0;
125 buf2[4] = 0;
126 memcpy (buf1,c,4);
127 memcpy (buf2,c+4,4);
128 ret = strtol(buf1,NULL,16)*65536 + strtol(buf2,NULL,16);
129 return ret;
130 }
131
132 void DeleteKeyList(sig **pptr)
133 {
134 sig *current = *pptr;
135
136 while (*pptr) {
137 current = (*pptr)->next;
138 free (*pptr);
139 *pptr = current;
140 }
141 }
142
143 /* recursive function to mark connected keys in the connected set */
144 int DFSMarkConnected (int *markset, int id) {
145 sig *psig;
146 int num = 1;
147 /* mark this node, call this function for all subnodes that aren't
148 * marked already */
149 markset[id] = 1;
150 for (psig = keys[id].from; psig; psig = psig->next) {
151 if (!markset[psig->id])
152 num += DFSMarkConnected (markset, psig->id);
153 }
154
155 return num;
156 }
157
158 int GetKeyById(const unsigned char* searchid) {
159 int i;
160 unsigned int s1,s2;
161
162 s1 = ConvertFromHex(searchid);
163 s2 = ConvertFromHex(searchid+8);
164 for (i = 0; i < numkeys; i++) {
165 struct keydata *key = &keys[i];
166 if ((s1 == key->id1) && (s2 == key->id2)) {
167 return i;
168 }
169 }
170 return (-1);
171 }
172
173 /* new _much_ faster BFS version of MeanCrawler() contributed by
174 * Hal J. Burch <hburch@halport.lumeta.com> */
175 void MeanCrawler(int *distset, int id, int len) {
176 sig *psig;
177 int queue[MAXKEYS];
178 int qhead, qtail;
179
180 memset(queue,0,sizeof(int)*MAXKEYS);
181 queue[0] = id;
182 distset[id] = 0;
183 qhead = 0;
184 qtail = 1;
185
186 while (qtail > qhead) {
187 id = queue[qhead++];
188 len = distset[id];
189 psig = keys[id].to;
190 while (psig) {
191 if ((len+1) < distset[psig->id]) {
192 distset[psig->id] = len+1;
193 queue[qtail++] = psig->id;
194 }
195 psig = psig->next;
196 }
197 }
198 }
199
200 float MeanDistance(int id, int *hops, int *hophigh, sig **farthest) {
201 int dist[MAXKEYS];
202 int i;
203 int totaldist = 0;
204
205 /* init to a large value here, so shortest distance will always be
206 * less */
207 memset(dist,100,sizeof(int)*MAXKEYS);
208
209 MeanCrawler(dist,id,0);
210
211 for (i=0;i<numkeys;i++) {
212 if (component[i] == max_component) {
213 totaldist += dist[i];
214 if (dist[i] < MAXHOPS) hops[dist[i]]++;
215 if (dist[i] > *hophigh) {
216 *hophigh = dist[i];
217 DeleteKeyList(farthest);
218 }
219 if (dist[i] == *hophigh) {
220 AddKeyToList(farthest, i);
221 }
222 }
223 }
224
225 if (*hophigh > MAXHOPS) *hophigh = MAXHOPS;
226
227 return ((float)totaldist / max_size);
228 }
229
230 FILE *OpenFileById(unsigned int id) {
231 char buf[255];
232 char idchr[9];
233
234 sprintf(idchr,"%08X",id);
235
236 /* first the directory */
237 buf[0] = '\0';
238 strcat(buf, outdir);
239 if (outsubdirs) {
240 strncat(buf,idchr,2);
241 mkdir(buf,(mode_t)493);
242 strcat(buf,"/");
243 }
244 strcat(buf,idchr);
245 return fopen(buf,"w");
246 }
247
248 /* ################################################################# */
249 /* program block functions, not predeclared */
250
251 int OpenFiles() {
252 char buf[255];
253
254 fpin = fopen(infile, "r");
255 if (!fpin) return 1;
256
257 /* create output dir if necessary. this will just fail if it exists */
258 mkdir(outdir, (mode_t)493);
259
260 /* status file */
261 buf[0] = '\0';
262 strcat(buf, outdir);
263 strcat(buf,"status.txt");
264 fpstat = fopen(buf,"w");
265 if (!fpstat) return 1;
266
267 /* msd output file */
268 buf[0] = '\0';
269 strcat(buf, outdir);
270 strcat(buf,"msd.txt");
271 fpmsd = fopen(buf,"w");
272 if (!fpmsd) return 1;
273
274 /* othersets output file */
275 buf[0] = '\0';
276 strcat(buf, outdir);
277 strcat(buf,"othersets.txt");
278 fpsets = fopen(buf,"w");
279 if (!fpsets) return 1;
280
281 /* other output file */
282 buf[0] = '\0';
283 strcat(buf, outdir);
284 strcat(buf,"other.txt");
285 fpout = fopen(buf,"w");
286 if (!fpout) return 1;
287
288 return 0;
289 }
290
291 void ParseArgs(int argc, char **argv)
292 {
293 int outdirlen;
294
295 while (1) {
296 int option = getopt(argc, argv, "hi:o:1");
297 if (option == -1)
298 break;
299 switch (option) {
300 case 'h':
301 printf ("Usage: %s [-h1] [-i infile] [-o outdir]\n", argv[0]);
302 exit (0);
303 break;
304 case 'i':
305 infile = optarg;
306 break;
307 case 'o':
308 outdir = optarg;
309 outdirlen = strlen(outdir);
310 if (outdir[outdirlen - 1] != '/') {
311 outdir = malloc(outdirlen + 2);
312 memcpy(outdir, optarg, outdirlen);
313 outdir[outdirlen] = '/';
314 outdir[outdirlen + 1] = '\0';
315 }
316 break;
317 case '1':
318 outsubdirs = 0;
319 break;
320 }
321 }
322
323 if (optind < argc) {
324 /* Assume it's infile */
325 infile = argv[optind];
326 }
327 }
328
329 int PrintKeyList(FILE *f, sig *s)
330 {
331 int i = 0;
332 while (s) {
333 struct keydata *key = &keys[s->id];
334 fprintf(f, " %08X %08X\n", key->id1, key->id2);
335 s = s->next;
336 i++;
337 }
338 return i;
339 }
340
341 void ReadInput() {
342 unsigned char buf[20];
343 int currentkey = -1;
344
345 fprintf(fpstat,"Importing pass 1 (keys)...\n");
346 while (fread(buf,1,18,fpin) == 18) {
347 if (buf[17] != '\n') continue;
348 if (buf[0] == 'p') {
349 AddKey(buf+1);
350 }
351 }
352 fprintf(fpstat,"done.\n");
353 fprintf(fpstat,"%d keys imported\n",numkeys);
354
355 rewind(fpin);
356 fprintf(fpstat,"Importing pass 2 (sigs)...\n");
357 while (fread(buf,1,18,fpin) == 18) {
358 if (buf[17] != '\n') continue;
359 if (buf[0] == 'p') {
360 currentkey = GetKeyById(buf+1);
361 if (currentkey == -1) {
362 fprintf(fpstat,"Error finding key in pass 2.\n");
363 exit(EXIT_FAILURE);
364 }
365 }
366 if (buf[0] == 's') {
367 AddSig(GetKeyById(buf+1),currentkey);
368 if ((numsigs%1000) == 0) {
369 fprintf(fpstat,"%d sigs imported...\n",numsigs);
370 fflush(fpstat);
371 }
372 }
373 }
374 fprintf(fpstat,"done.\n");
375 fprintf(fpstat,"%d sigs imported\n",numsigs);
376 }
377
378 /* This is intended for later use. As it takes a lot of time for the
379 * signature imports, this will save time for future runs of the program
380 * with the same data set. */
381
382 void SaveState() {
383 /* not yet implemented. need to figure out how to best handle the
384 * linked lists of sigs first */
385 }
386
387 int dfsnum[MAXKEYS];
388 int lownum[MAXKEYS];
389 int removed[MAXKEYS];
390 int stack[MAXKEYS];
391 int stackindex;
392 int lastdfsnum;
393
394 void DFSVisit(int id) {
395 sig *psig;
396
397 dfsnum[id] = lownum[id] = ++lastdfsnum;
398 stack[stackindex++] = id;
399
400 for (psig = keys[id].to; psig; psig = psig->next) {
401 int neighbor = psig->id;
402
403 if (removed[neighbor])
404 continue;
405
406 if (!dfsnum[neighbor]) {
407 DFSVisit (neighbor);
408
409 if (lownum[neighbor] < lownum[id])
410 lownum[id] = lownum[neighbor];
411 } else if (dfsnum[neighbor] < lownum[id])
412 lownum[id] = dfsnum[neighbor];
413 }
414
415 if (lownum[id] == dfsnum[id]) {
416 int i, size = 0;
417
418 do {
419 struct keydata *key;
420 i = stack[--stackindex];
421 key = &keys[i];
422 component[i] = id;
423 removed[i] = 1;
424 size++;
425 fprintf(fpsets, "%08X %08X\n", key->id1, key->id2);
426 } while (i != id);
427
428 fprintf(fpsets,
429 "*** %d keys in this strongly connected set\n\n", size);
430
431 if (max_size < size) {
432 max_size = size;
433 max_component = id;
434 }
435 }
436 }
437
438 void TestConnectivity() {
439 int i;
440
441 for (i = 0; i < numkeys; i++)
442 if (!dfsnum[i])
443 DFSVisit (i);
444
445 num_reachable = DFSMarkConnected (reachable, max_component);
446
447 fprintf(fpstat,"reachable set is size %d\n", num_reachable);
448 fprintf(fpstat,"strongly connected set is size %d\n", max_size);
449 }
450
451 /* ################################################################# */
452 /* report functions, sort of top level */
453
454 void IndivReport(FILE *fp,int key) {
455 int totalsigsto, totalsigsfrom;
456
457 /* head of report */
458 fprintf(fp,"KeyID %08X %08X\n\n", keys[key].id1, keys[key].id2);
459
460 fprintf(fp,"This individual key report was generated as part of the monthly keyanalyze\n");
461 fprintf(fp,"report at http://dtype.org/keyanalyze/.\n\n");
462
463 fprintf(fp,"Note: Key signature counts and lists are from a pruned list that only\n");
464 fprintf(fp,"includes keys with signatures other than their own.\n\n");
465
466 fprintf(fp,"Signatures to this key:\n");
467 totalsigsto = PrintKeyList(fp, keys[key].to);
468 fprintf(fp,"Total: %d signatures to this id from this set\n\n",totalsigsto);
469
470 fprintf(fp,"Signatures from this key:\n");
471 totalsigsfrom = PrintKeyList(fp, keys[key].from);
472 fprintf(fp,"Total: %d signatures from this id to this set\n\n",totalsigsfrom);
473 }
474
475 /* ################################################################# */
476 /* thread routine */
477
478 void *thread_slave(void *arg) {
479 int i,j;
480 float threadmean;
481 sig *distant_sigs = NULL;
482 FILE *fpindiv;
483
484 int hops[MAXHOPS]; /* array for hop histogram */
485 int hophigh; /* highest number of hops for this key */
486
487 threadparam data = *(threadparam *)arg;
488
489 for (i=0;i<numkeys;i++) {
490 struct keydata *key = &keys[i];
491 /* do this for all set2 now */
492 if (reachable[i] && ((i%2)==data.threadnum)) {
493 /* zero out hop histogram */
494 memset(hops,0,sizeof(int)*MAXHOPS);
495 hophigh = 0;
496
497 threadmean = MeanDistance(i,hops,&hophigh,&distant_sigs);
498
499 pthread_mutex_lock(&mean_l);
500 meantotal += threadmean;
501 fprintf(fpmsd,"%08X %08X %8.4f\n"
502 ,key->id1, key->id2, threadmean);
503 fflush(fpmsd);
504 pthread_mutex_unlock(&mean_l);
505
506 /* individual report */
507 fpindiv = OpenFileById(key->id2);
508 IndivReport(fpindiv,i);
509 fprintf(fpindiv, "This key is %sin the strong set.\n",
510 component[i] == max_component ? "" : "not ");
511 fprintf(fpindiv,"Mean distance to this key from strong set: %8.4f\n\n",threadmean);
512 fprintf(fpindiv,"Breakout by hop count (only from strong set):\n");
513 for (j=0;j<=hophigh;j++) {
514 fprintf(fpindiv,"%2d hops: %5d\n",j,hops[j]);
515 }
516 if (distant_sigs) {
517 fprintf(fpindiv,"\nFarthest keys (%d hops):\n", j-1);
518 PrintKeyList(fpindiv, distant_sigs);
519 DeleteKeyList(&distant_sigs);
520 }
521 fclose(fpindiv);
522 }
523 }
524 return NULL;
525 }
526
527 /* ################################################################# */
528 /* main() */
529
530 int main(int argc, char **argv)
531 {
532 pthread_t *slave0,*slave1;
533 threadparam arg0,arg1;
534 void *retval;
535
536 ParseArgs(argc, argv);
537 if (OpenFiles()) {
538 fprintf(stderr, "Error opening files.\n");
539 exit(EXIT_FAILURE);
540 }
541 ReadInput();
542 TestConnectivity();
543
544 pthread_mutex_init(&mean_l,NULL);
545 slave0 = (pthread_t *) calloc(1, sizeof(pthread_t));
546 slave1 = (pthread_t *) calloc(1, sizeof(pthread_t));
547 arg0.threadnum = 0;
548 arg1.threadnum = 1;
549
550 if (pthread_create(slave0,NULL,thread_slave,&arg0)) {
551 fprintf(stderr,"Cannot create thread 0.");
552 }
553 if (pthread_create(slave1,NULL,thread_slave,&arg1)) {
554 fprintf(stderr,"Cannot create thread 1.");
555 }
556 pthread_join(*slave0, &retval);
557 pthread_join(*slave1, &retval);
558
559 fprintf(fpout,"Average mean is %9.4f\n",meantotal/num_reachable);
560 /* ReportMostSignatures(); */
561 CloseFiles();
562 return 0;
563 }