* Import keyanalyze into signing-party. Thanks to Matthew Wilcox for the
[pgp-tools.git] / keyanalyze / keyanalyze.c
1 /* keyanalyze.c
2 * Does some analysis of pre-monged pgp keyrings for some interesting data.
3 * Some code (c)2001 M. Drew Streib <dtype@dtype.org>
4 * Some code (c)2001 Thomas Roessler <roessler@does-not-exist.org>
5 * Some code (c)2001 Hal J. Burch <hburch@halport.lumeta.com>
6 * Some code (c)2001 Matt Kraai <kraai@alumni.carnegiemellon.edu>
7 * Some Code (c)2001 Steve Langasek <vorlon@netexpress.net>
8 *
9 * You are licenced to use this code under the terms of the GNU General
10 * Public License (GPL) version 2.
11 */
12
13 /* some configurables */
14 static char *infile = "preprocess.keys";
15 static char *outdir = "output/";
16 static int outsubdirs = 1; /* create output/12/12345678 or output/12345678 */
17 #define MAXKEYS 160000 /* MUST be > `grep p preprocess.keys | wc` */
18 #define MINSETSIZE 10 /* minimum set size we care about for strong sets */
19 #define MAXHOPS 30 /* max hop count we care about for report */
20
21 /* includes */
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <sys/types.h>
26 #include <sys/stat.h>
27 #include <fcntl.h>
28 #include <unistd.h>
29 #include <pthread.h>
30
31 /* globals */
32 struct sig {
33 int id;
34 struct sig *next;
35 };
36 typedef struct sig sig;
37
38 struct threadparam {
39 int threadnum;
40 };
41 typedef struct threadparam threadparam;
42
43 struct keydata {
44 unsigned int id1;
45 unsigned int id2;
46 sig *to;
47 sig *from;
48 };
49
50 struct keydata keys[MAXKEYS];
51 FILE *fpin,*fpout,*fpstat,*fpsets,*fpmsd;
52 int numkeys = 0;
53 int numsigs = 0;
54 int component[MAXKEYS];
55 int max_component;
56 int max_size;
57 int reachable[MAXKEYS];
58 int num_reachable;
59 float meantotal;
60 pthread_mutex_t mean_l;
61
62 /* declarations */
63 void AddKey (unsigned char *newid);
64 void AddSig (int src, int dst);
65 void CloseFiles();
66 int CountSigs(sig *current);
67 unsigned int ConvertFromHex (const unsigned char *c);
68 int GetKeyById(const unsigned char* searchid);
69 void MeanCrawler(int *distset, int id, int len);
70 float MeanDistance(int id, int *hops, int *hophigh, sig **farthest);
71
72 /* ################################################################# */
73 /* helper functions, in alpha order */
74
75 void AddKey (unsigned char *newid) {
76 struct keydata *key = &keys[numkeys++];
77
78 /* assume no dupes for now */
79 key->id1 = ConvertFromHex(newid);
80 key->id2 = ConvertFromHex(newid+8);
81 }
82
83 void AddKeyToList(sig **pptr, int id)
84 {
85 while (*pptr)
86 pptr = &(*pptr)->next;
87
88 *pptr = (sig *) calloc (1,sizeof(sig));
89 (*pptr)->id = id;
90 }
91
92 void AddSig (int src, int dst) {
93 /* if GetKeyById returned -1, then we exit here */
94 if ((src == -1) || (dst == -1))
95 return;
96
97 AddKeyToList(&keys[dst].to, src);
98 AddKeyToList(&keys[src].from, dst);
99
100 numsigs++;
101 }
102
103 void CloseFiles() {
104 fclose(fpin);
105 fclose(fpout);
106 }
107
108 int CountSigs(sig *current) {
109 int ret = 0;
110
111 while (current->next) {
112 current = current->next;
113 ret++;
114 }
115
116 return ret;
117 }
118
119 unsigned int ConvertFromHex (const unsigned char *c) {
120 unsigned char buf1[5];
121 unsigned char buf2[5];
122 unsigned int ret;
123
124 buf1[4] = 0;
125 buf2[4] = 0;
126 memcpy (buf1,c,4);
127 memcpy (buf2,c+4,4);
128 ret = strtol(buf1,NULL,16)*65536 + strtol(buf2,NULL,16);
129 return ret;
130 }
131
132 void DeleteKeyList(sig **pptr)
133 {
134 sig *current = *pptr;
135
136 while (*pptr) {
137 current = (*pptr)->next;
138 free (*pptr);
139 *pptr = current;
140 }
141 }
142
143 /* recursive function to mark connected keys in the connected set */
144 int DFSMarkConnected (int *markset, int id) {
145 sig *psig;
146 int num = 1;
147 /* mark this node, call this function for all subnodes that aren't
148 * marked already */
149 markset[id] = 1;
150 for (psig = keys[id].from; psig; psig = psig->next) {
151 if (!markset[psig->id])
152 num += DFSMarkConnected (markset, psig->id);
153 }
154
155 return num;
156 }
157
158 int GetKeyById(const unsigned char* searchid) {
159 int i;
160 unsigned int s1,s2;
161
162 s1 = ConvertFromHex(searchid);
163 s2 = ConvertFromHex(searchid+8);
164 for (i = 0; i < numkeys; i++) {
165 struct keydata *key = &keys[i];
166 if ((s1 == key->id1) && (s2 == key->id2)) {
167 return i;
168 }
169 }
170 return (-1);
171 }
172
173 /* new _much_ faster BFS version of MeanCrawler() contributed by
174 * Hal J. Burch <hburch@halport.lumeta.com> */
175 void MeanCrawler(int *distset, int id, int len) {
176 sig *psig;
177 int queue[MAXKEYS];
178 int qhead, qtail;
179
180 memset(queue,0,sizeof(int)*MAXKEYS);
181 queue[0] = id;
182 distset[id] = 0;
183 qhead = 0;
184 qtail = 1;
185
186 while (qtail > qhead) {
187 id = queue[qhead++];
188 len = distset[id];
189 psig = keys[id].to;
190 while (psig) {
191 if ((len+1) < distset[psig->id]) {
192 distset[psig->id] = len+1;
193 queue[qtail++] = psig->id;
194 }
195 psig = psig->next;
196 }
197 }
198 }
199
200 float MeanDistance(int id, int *hops, int *hophigh, sig **farthest) {
201 int dist[MAXKEYS];
202 int i;
203 int totaldist = 0;
204
205 /* init to a large value here, so shortest distance will always be
206 * less */
207 memset(dist,100,sizeof(int)*MAXKEYS);
208
209 MeanCrawler(dist,id,0);
210
211 for (i=0;i<numkeys;i++) {
212 if (component[i] == max_component) {
213 totaldist += dist[i];
214 if (dist[i] < MAXHOPS) hops[dist[i]]++;
215 if (dist[i] > *hophigh) {
216 *hophigh = dist[i];
217 DeleteKeyList(farthest);
218 }
219 if (dist[i] == *hophigh) {
220 AddKeyToList(farthest, i);
221 }
222 }
223 }
224
225 if (*hophigh > MAXHOPS) *hophigh = MAXHOPS;
226
227 return ((float)totaldist / max_size);
228 }
229
230 FILE *OpenFileById(unsigned int id) {
231 char buf[255];
232 char idchr[9];
233
234 sprintf(idchr,"%08X",id);
235
236 /* first the directory */
237 buf[0] = '\0';
238 strcat(buf, outdir);
239 if (outsubdirs) {
240 strncat(buf,idchr,2);
241 mkdir(buf,(mode_t)493);
242 strcat(buf,"/");
243 }
244 strcat(buf,idchr);
245 return fopen(buf,"w");
246 }
247
248 /* ################################################################# */
249 /* program block functions, not predeclared */
250
251 int OpenFiles() {
252 char buf[255];
253
254 fpin = fopen(infile, "r");
255 if (!fpin) return 1;
256
257 /* create output dir if necessary. this will just fail if it exists */
258 mkdir(outdir, (mode_t)493);
259
260 /* status file */
261 buf[0] = '\0';
262 strcat(buf, outdir);
263 strcat(buf,"status.txt");
264 fpstat = fopen(buf,"w");
265 if (!fpstat) return 1;
266
267 /* msd output file */
268 buf[0] = '\0';
269 strcat(buf, outdir);
270 strcat(buf,"msd.txt");
271 fpmsd = fopen(buf,"w");
272 if (!fpmsd) return 1;
273
274 /* othersets output file */
275 buf[0] = '\0';
276 strcat(buf, outdir);
277 strcat(buf,"othersets.txt");
278 fpsets = fopen(buf,"w");
279 if (!fpsets) return 1;
280
281 /* other output file */
282 buf[0] = '\0';
283 strcat(buf, outdir);
284 strcat(buf,"other.txt");
285 fpout = fopen(buf,"w");
286 if (!fpout) return 1;
287
288 return 0;
289 }
290
291 void ParseArgs(int argc, char **argv)
292 {
293 int outdirlen;
294
295 while (1) {
296 int option = getopt(argc, argv, "i:o:1");
297 if (option == -1)
298 break;
299 switch (option) {
300 case 'i':
301 infile = optarg;
302 break;
303 case 'o':
304 outdir = optarg;
305 outdirlen = strlen(outdir);
306 if (outdir[outdirlen - 1] != '/') {
307 outdir = malloc(outdirlen + 2);
308 memcpy(outdir, optarg, outdirlen);
309 outdir[outdirlen] = '/';
310 outdir[outdirlen + 1] = '\0';
311 }
312 break;
313 case '1':
314 outsubdirs = 0;
315 break;
316 }
317 }
318
319 if (optind < argc) {
320 /* Assume it's infile */
321 infile = argv[optind];
322 }
323 }
324
325 int PrintKeyList(FILE *f, sig *s)
326 {
327 int i = 0;
328 while (s) {
329 struct keydata *key = &keys[s->id];
330 fprintf(f, " %08X %08X\n", key->id1, key->id2);
331 s = s->next;
332 i++;
333 }
334 return i;
335 }
336
337 void ReadInput() {
338 unsigned char buf[20];
339 int currentkey = -1;
340
341 fprintf(fpstat,"Importing pass 1 (keys)...\n");
342 while (fread(buf,1,18,fpin) == 18) {
343 if (buf[17] != '\n') continue;
344 if (buf[0] == 'p') {
345 AddKey(buf+1);
346 }
347 }
348 fprintf(fpstat,"done.\n");
349 fprintf(fpstat,"%d keys imported\n",numkeys);
350
351 rewind(fpin);
352 fprintf(fpstat,"Importing pass 2 (sigs)...\n");
353 while (fread(buf,1,18,fpin) == 18) {
354 if (buf[17] != '\n') continue;
355 if (buf[0] == 'p') {
356 currentkey = GetKeyById(buf+1);
357 if (currentkey == -1) {
358 fprintf(fpstat,"Error finding key in pass 2.\n");
359 exit(EXIT_FAILURE);
360 }
361 }
362 if (buf[0] == 's') {
363 AddSig(GetKeyById(buf+1),currentkey);
364 if ((numsigs%1000) == 0) {
365 fprintf(fpstat,"%d sigs imported...\n",numsigs);
366 fflush(fpstat);
367 }
368 }
369 }
370 fprintf(fpstat,"done.\n");
371 fprintf(fpstat,"%d sigs imported\n",numsigs);
372 }
373
374 /* This is intended for later use. As it takes a lot of time for the
375 * signature imports, this will save time for future runs of the program
376 * with the same data set. */
377
378 void SaveState() {
379 /* not yet implemented. need to figure out how to best handle the
380 * linked lists of sigs first */
381 }
382
383 int dfsnum[MAXKEYS];
384 int lownum[MAXKEYS];
385 int removed[MAXKEYS];
386 int stack[MAXKEYS];
387 int stackindex;
388 int lastdfsnum;
389
390 void DFSVisit(int id) {
391 sig *psig;
392
393 dfsnum[id] = lownum[id] = ++lastdfsnum;
394 stack[stackindex++] = id;
395
396 for (psig = keys[id].to; psig; psig = psig->next) {
397 int neighbor = psig->id;
398
399 if (removed[neighbor])
400 continue;
401
402 if (!dfsnum[neighbor]) {
403 DFSVisit (neighbor);
404
405 if (lownum[neighbor] < lownum[id])
406 lownum[id] = lownum[neighbor];
407 } else if (dfsnum[neighbor] < lownum[id])
408 lownum[id] = dfsnum[neighbor];
409 }
410
411 if (lownum[id] == dfsnum[id]) {
412 int i, size = 0;
413
414 do {
415 struct keydata *key;
416 i = stack[--stackindex];
417 key = &keys[i];
418 component[i] = id;
419 removed[i] = 1;
420 size++;
421 fprintf(fpsets, "%08X %08X\n", key->id1, key->id2);
422 } while (i != id);
423
424 fprintf(fpsets,
425 "*** %d keys in this strongly connected set\n\n", size);
426
427 if (max_size < size) {
428 max_size = size;
429 max_component = id;
430 }
431 }
432 }
433
434 void TestConnectivity() {
435 int i;
436
437 for (i = 0; i < numkeys; i++)
438 if (!dfsnum[i])
439 DFSVisit (i);
440
441 num_reachable = DFSMarkConnected (reachable, max_component);
442
443 fprintf(fpstat,"reachable set is size %d\n", num_reachable);
444 fprintf(fpstat,"strongly connected set is size %d\n", max_size);
445 }
446
447 /* ################################################################# */
448 /* report functions, sort of top level */
449
450 void IndivReport(FILE *fp,int key) {
451 int totalsigsto, totalsigsfrom;
452
453 /* head of report */
454 fprintf(fp,"KeyID %08X %08X\n\n", keys[key].id1, keys[key].id2);
455
456 fprintf(fp,"This individual key report was generated as part of the monthly keyanalyze\n");
457 fprintf(fp,"report at http://dtype.org/keyanalyze/.\n\n");
458
459 fprintf(fp,"Note: Key signature counts and lists are from a pruned list that only\n");
460 fprintf(fp,"includes keys with signatures other than their own.\n\n");
461
462 fprintf(fp,"Signatures to this key:\n");
463 totalsigsto = PrintKeyList(fp, keys[key].to);
464 fprintf(fp,"Total: %d signatures to this id from this set\n\n",totalsigsto);
465
466 fprintf(fp,"Signatures from this key:\n");
467 totalsigsfrom = PrintKeyList(fp, keys[key].from);
468 fprintf(fp,"Total: %d signatures from this id to this set\n\n",totalsigsfrom);
469 }
470
471 /* ################################################################# */
472 /* thread routine */
473
474 void *thread_slave(void *arg) {
475 int i,j;
476 float threadmean;
477 sig *distant_sigs = NULL;
478 FILE *fpindiv;
479
480 int hops[MAXHOPS]; /* array for hop histogram */
481 int hophigh; /* highest number of hops for this key */
482
483 threadparam data = *(threadparam *)arg;
484
485 for (i=0;i<numkeys;i++) {
486 struct keydata *key = &keys[i];
487 /* do this for all set2 now */
488 if (reachable[i] && ((i%2)==data.threadnum)) {
489 /* zero out hop histogram */
490 memset(hops,0,sizeof(int)*MAXHOPS);
491 hophigh = 0;
492
493 threadmean = MeanDistance(i,hops,&hophigh,&distant_sigs);
494
495 pthread_mutex_lock(&mean_l);
496 meantotal += threadmean;
497 fprintf(fpmsd,"%08X %08X %8.4f\n"
498 ,key->id1, key->id2, threadmean);
499 fflush(fpmsd);
500 pthread_mutex_unlock(&mean_l);
501
502 /* individual report */
503 fpindiv = OpenFileById(key->id2);
504 IndivReport(fpindiv,i);
505 fprintf(fpindiv, "This key is %sin the strong set.\n",
506 component[i] == max_component ? "" : "not ");
507 fprintf(fpindiv,"Mean distance to this key from strong set: %8.4f\n\n",threadmean);
508 fprintf(fpindiv,"Breakout by hop count (only from strong set):\n");
509 for (j=0;j<=hophigh;j++) {
510 fprintf(fpindiv,"%2d hops: %5d\n",j,hops[j]);
511 }
512 if (distant_sigs) {
513 fprintf(fpindiv,"\nFarthest keys (%d hops):\n", j-1);
514 PrintKeyList(fpindiv, distant_sigs);
515 DeleteKeyList(&distant_sigs);
516 }
517 fclose(fpindiv);
518 }
519 }
520 return NULL;
521 }
522
523 /* ################################################################# */
524 /* main() */
525
526 int main(int argc, char **argv)
527 {
528 pthread_t *slave0,*slave1;
529 threadparam arg0,arg1;
530 void *retval;
531
532 ParseArgs(argc, argv);
533 if (OpenFiles()) {
534 fprintf(stderr, "Error opening files.\n");
535 exit(EXIT_FAILURE);
536 }
537 ReadInput();
538 TestConnectivity();
539
540 pthread_mutex_init(&mean_l,NULL);
541 slave0 = (pthread_t *) calloc(1, sizeof(pthread_t));
542 slave1 = (pthread_t *) calloc(1, sizeof(pthread_t));
543 arg0.threadnum = 0;
544 arg1.threadnum = 1;
545
546 if (pthread_create(slave0,NULL,thread_slave,&arg0)) {
547 fprintf(stderr,"Cannot create thread 0.");
548 }
549 if (pthread_create(slave1,NULL,thread_slave,&arg1)) {
550 fprintf(stderr,"Cannot create thread 1.");
551 }
552 pthread_join(*slave0, &retval);
553 pthread_join(*slave1, &retval);
554
555 fprintf(fpout,"Average mean is %9.4f\n",meantotal/num_reachable);
556 /* ReportMostSignatures(); */
557 CloseFiles();
558 return 0;
559 }