* Import keyanalyze into signing-party. Thanks to Matthew Wilcox for the
[pgp-tools.git] / keyanalyze / keyanalyze.c
diff --git a/keyanalyze/keyanalyze.c b/keyanalyze/keyanalyze.c
new file mode 100644 (file)
index 0000000..91d19a5
--- /dev/null
@@ -0,0 +1,559 @@
+/* keyanalyze.c
+ * Does some analysis of pre-monged pgp keyrings for some interesting data.
+ * Some code (c)2001 M. Drew Streib <dtype@dtype.org>
+ * Some code (c)2001 Thomas Roessler <roessler@does-not-exist.org>
+ * Some code (c)2001 Hal J. Burch <hburch@halport.lumeta.com>
+ * Some code (c)2001 Matt Kraai <kraai@alumni.carnegiemellon.edu>
+ * Some Code (c)2001 Steve Langasek <vorlon@netexpress.net>
+ *
+ * You are licenced to use this code under the terms of the GNU General
+ * Public License (GPL) version 2.
+ */
+
+/* some configurables */
+static char *infile = "preprocess.keys";
+static char *outdir = "output/";
+static int outsubdirs = 1; /* create output/12/12345678 or output/12345678 */
+#define MAXKEYS        160000 /* MUST be > `grep p preprocess.keys | wc` */
+#define MINSETSIZE     10 /* minimum set size we care about for strong sets */
+#define MAXHOPS                30 /* max hop count we care about for report */
+
+/* includes */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <pthread.h>
+
+/* globals */
+struct sig {
+       int id;
+       struct sig *next;
+};
+typedef struct sig sig;
+
+struct threadparam {
+       int threadnum;
+};
+typedef struct threadparam threadparam;
+
+struct keydata {
+       unsigned int id1;
+       unsigned int id2;
+       sig *to;
+       sig *from;
+};
+
+struct keydata         keys[MAXKEYS];
+FILE                   *fpin,*fpout,*fpstat,*fpsets,*fpmsd;
+int                    numkeys = 0;
+int                    numsigs = 0;
+int                    component[MAXKEYS];
+int                    max_component;
+int                    max_size;
+int                    reachable[MAXKEYS];
+int                    num_reachable;
+float                  meantotal;
+pthread_mutex_t mean_l;
+
+/* declarations */
+void AddKey (unsigned char *newid);
+void AddSig (int src, int dst);
+void CloseFiles();
+int CountSigs(sig *current);
+unsigned int ConvertFromHex (const unsigned char *c);
+int GetKeyById(const unsigned char* searchid);
+void MeanCrawler(int *distset, int id, int len);
+float MeanDistance(int id, int *hops, int *hophigh, sig **farthest);
+
+/* ################################################################# */
+/* helper functions, in alpha order */
+
+void AddKey (unsigned char *newid) {
+       struct keydata *key = &keys[numkeys++];
+
+       /* assume no dupes for now */
+       key->id1 = ConvertFromHex(newid);
+       key->id2 = ConvertFromHex(newid+8);
+}
+
+void AddKeyToList(sig **pptr, int id)
+{
+       while (*pptr)
+               pptr = &(*pptr)->next;
+
+       *pptr = (sig *) calloc (1,sizeof(sig));
+       (*pptr)->id = id;
+}
+
+void AddSig (int src, int dst) {
+       /* if GetKeyById returned -1, then we exit here */
+       if ((src == -1) || (dst == -1))
+               return;
+
+       AddKeyToList(&keys[dst].to, src);
+       AddKeyToList(&keys[src].from, dst);
+       
+       numsigs++;
+}
+
+void CloseFiles() {
+       fclose(fpin);
+       fclose(fpout);
+}
+
+int CountSigs(sig *current) {
+       int ret = 0;
+
+       while (current->next) {
+               current = current->next;
+               ret++;
+       }
+
+       return ret;
+}
+
+unsigned int ConvertFromHex (const unsigned char *c) {
+       unsigned char buf1[5];
+       unsigned char buf2[5];
+       unsigned int ret;
+
+       buf1[4] = 0;
+       buf2[4] = 0;
+       memcpy (buf1,c,4);
+       memcpy (buf2,c+4,4);
+       ret = strtol(buf1,NULL,16)*65536 + strtol(buf2,NULL,16);
+       return ret;
+}
+
+void DeleteKeyList(sig **pptr)
+{
+       sig *current = *pptr;
+
+       while (*pptr) {
+               current = (*pptr)->next;
+               free (*pptr);
+               *pptr = current;
+       }
+}
+
+/* recursive function to mark connected keys in the connected set */
+int DFSMarkConnected (int *markset, int id) {
+       sig *psig;
+       int num = 1;
+       /* mark this node, call this function for all subnodes that aren't
+        * marked already */
+       markset[id] = 1;
+       for (psig = keys[id].from; psig; psig = psig->next) {
+               if (!markset[psig->id])
+                       num += DFSMarkConnected (markset, psig->id);
+       }
+
+       return num;
+}
+
+int GetKeyById(const unsigned char* searchid) {
+       int i;
+       unsigned int s1,s2;
+
+       s1 = ConvertFromHex(searchid);
+       s2 = ConvertFromHex(searchid+8);
+       for (i = 0; i < numkeys; i++) {
+               struct keydata *key = &keys[i];
+               if ((s1 == key->id1) && (s2 == key->id2)) {
+                       return i;
+               }
+       }
+       return (-1);
+}
+
+/* new _much_ faster BFS version of MeanCrawler() contributed by
+ * Hal J. Burch <hburch@halport.lumeta.com> */
+void MeanCrawler(int *distset, int id, int len) {
+       sig *psig;
+       int queue[MAXKEYS];
+       int qhead, qtail;
+
+       memset(queue,0,sizeof(int)*MAXKEYS);
+       queue[0] = id;
+       distset[id] = 0;
+       qhead = 0;
+       qtail = 1;
+
+       while (qtail > qhead) {
+               id = queue[qhead++];
+               len = distset[id];
+               psig = keys[id].to;
+               while (psig) {
+                       if ((len+1) < distset[psig->id]) {
+                               distset[psig->id] = len+1;
+                               queue[qtail++] = psig->id;
+                       }
+                       psig = psig->next;
+               }
+       }
+} 
+
+float MeanDistance(int id, int *hops, int *hophigh, sig **farthest) {
+       int dist[MAXKEYS];
+       int i;
+       int totaldist = 0;
+
+       /* init to a large value here, so shortest distance will always be
+        * less */
+       memset(dist,100,sizeof(int)*MAXKEYS);
+
+       MeanCrawler(dist,id,0);
+
+       for (i=0;i<numkeys;i++) {
+               if (component[i] == max_component) {
+                       totaldist += dist[i];
+                       if (dist[i] < MAXHOPS) hops[dist[i]]++;
+                       if (dist[i] > *hophigh) {
+                               *hophigh = dist[i];
+                               DeleteKeyList(farthest);
+                       }
+                       if (dist[i] == *hophigh) {
+                               AddKeyToList(farthest, i);
+                       }
+               }
+       }
+
+       if (*hophigh > MAXHOPS) *hophigh = MAXHOPS;
+
+       return ((float)totaldist / max_size);
+}
+
+FILE *OpenFileById(unsigned int id) {
+       char buf[255];
+       char idchr[9];
+
+       sprintf(idchr,"%08X",id);
+       
+       /* first the directory */
+       buf[0] = '\0';
+       strcat(buf, outdir);
+       if (outsubdirs) {
+               strncat(buf,idchr,2);
+               mkdir(buf,(mode_t)493);
+               strcat(buf,"/");
+       }
+       strcat(buf,idchr);
+       return fopen(buf,"w");
+}
+
+/* ################################################################# */
+/* program block functions, not predeclared */
+
+int OpenFiles() {
+       char buf[255];
+
+       fpin = fopen(infile, "r");
+       if (!fpin) return 1;
+
+       /* create output dir if necessary. this will just fail if it exists */
+       mkdir(outdir, (mode_t)493);
+
+       /* status file */
+       buf[0] = '\0';
+       strcat(buf, outdir);
+       strcat(buf,"status.txt"); 
+       fpstat = fopen(buf,"w");
+       if (!fpstat) return 1;
+
+       /* msd output file */
+       buf[0] = '\0';
+       strcat(buf, outdir);
+       strcat(buf,"msd.txt"); 
+       fpmsd = fopen(buf,"w");
+       if (!fpmsd) return 1;
+
+       /* othersets output file */
+       buf[0] = '\0';
+       strcat(buf, outdir);
+       strcat(buf,"othersets.txt"); 
+       fpsets = fopen(buf,"w");
+       if (!fpsets) return 1;
+
+       /* other output file */
+       buf[0] = '\0';
+       strcat(buf, outdir);
+       strcat(buf,"other.txt"); 
+       fpout = fopen(buf,"w");
+       if (!fpout) return 1;
+
+       return 0;
+}
+
+void ParseArgs(int argc, char **argv)
+{
+       int outdirlen;
+
+       while (1) {
+               int option = getopt(argc, argv, "i:o:1");
+               if (option == -1)
+                       break;
+               switch (option) {
+               case 'i':
+                       infile = optarg;
+                       break;
+               case 'o':
+                       outdir = optarg;
+                       outdirlen = strlen(outdir);
+                       if (outdir[outdirlen - 1] != '/') {
+                               outdir = malloc(outdirlen + 2);
+                               memcpy(outdir, optarg, outdirlen);
+                               outdir[outdirlen] = '/';
+                               outdir[outdirlen + 1] = '\0';
+                       }
+                       break;
+               case '1':
+                       outsubdirs = 0;
+                       break;
+               }
+       }
+
+       if (optind < argc) {
+               /* Assume it's infile */
+               infile = argv[optind];
+       }
+}
+
+int PrintKeyList(FILE *f, sig *s)
+{
+       int i = 0;
+       while (s) {
+               struct keydata *key = &keys[s->id];
+               fprintf(f, "  %08X %08X\n", key->id1, key->id2);
+               s = s->next;
+               i++;
+       }
+       return i;
+}
+
+void ReadInput() {
+       unsigned char buf[20];
+       int currentkey = -1;
+       
+       fprintf(fpstat,"Importing pass 1 (keys)...\n");
+       while (fread(buf,1,18,fpin) == 18) {
+               if (buf[17] != '\n') continue;
+               if (buf[0] == 'p') {
+                       AddKey(buf+1);
+               }
+       }
+       fprintf(fpstat,"done.\n");
+       fprintf(fpstat,"%d keys imported\n",numkeys);
+
+       rewind(fpin);
+       fprintf(fpstat,"Importing pass 2 (sigs)...\n");
+       while (fread(buf,1,18,fpin) == 18) {
+               if (buf[17] != '\n') continue;
+               if (buf[0] == 'p') {
+                       currentkey = GetKeyById(buf+1);
+                       if (currentkey == -1) {
+                               fprintf(fpstat,"Error finding key in pass 2.\n");
+                               exit(EXIT_FAILURE);
+                       }
+               }
+               if (buf[0] == 's') {
+                               AddSig(GetKeyById(buf+1),currentkey);
+                               if ((numsigs%1000) == 0) {
+                                       fprintf(fpstat,"%d sigs imported...\n",numsigs);
+                                       fflush(fpstat);
+                               }
+               } 
+       }
+       fprintf(fpstat,"done.\n");
+       fprintf(fpstat,"%d sigs imported\n",numsigs);
+}
+
+/* This is intended for later use. As it takes a lot of time for the
+ * signature imports, this will save time for future runs of the program
+ * with the same data set. */
+
+void SaveState() {
+       /* not yet implemented. need to figure out how to best handle the
+        * linked lists of sigs first */
+}
+
+int dfsnum[MAXKEYS];
+int lownum[MAXKEYS];
+int removed[MAXKEYS];
+int stack[MAXKEYS];
+int stackindex;
+int lastdfsnum;
+
+void DFSVisit(int id) {
+       sig *psig;
+
+       dfsnum[id] = lownum[id] = ++lastdfsnum;
+       stack[stackindex++] = id;
+
+       for (psig = keys[id].to; psig; psig = psig->next) {
+               int neighbor = psig->id;
+
+               if (removed[neighbor])
+                       continue;
+
+               if (!dfsnum[neighbor]) {
+                       DFSVisit (neighbor);
+
+                       if (lownum[neighbor] < lownum[id])
+                               lownum[id] = lownum[neighbor];
+               } else if (dfsnum[neighbor] < lownum[id])
+                       lownum[id] = dfsnum[neighbor];
+       }
+
+       if (lownum[id] == dfsnum[id]) {
+               int i, size = 0;
+
+               do {
+                       struct keydata *key;
+                       i = stack[--stackindex];
+                       key = &keys[i];
+                       component[i] = id;
+                       removed[i] = 1;
+                       size++;
+                       fprintf(fpsets, "%08X %08X\n", key->id1, key->id2);
+               } while (i != id);
+
+               fprintf(fpsets,
+                       "*** %d keys in this strongly connected set\n\n", size);
+
+               if (max_size < size) {
+                       max_size = size;
+                       max_component = id;
+               }
+       }
+}
+
+void TestConnectivity() {
+       int i;
+
+       for (i = 0; i < numkeys; i++)
+               if (!dfsnum[i])
+                       DFSVisit (i);
+
+       num_reachable = DFSMarkConnected (reachable, max_component);
+
+       fprintf(fpstat,"reachable set is size %d\n", num_reachable);
+       fprintf(fpstat,"strongly connected set is size %d\n", max_size);
+}
+
+/* ################################################################# */
+/* report functions, sort of top level */
+
+void IndivReport(FILE *fp,int key) {
+       int totalsigsto, totalsigsfrom;
+
+       /* head of report */
+       fprintf(fp,"KeyID %08X %08X\n\n", keys[key].id1, keys[key].id2);
+
+       fprintf(fp,"This individual key report was generated as part of the monthly keyanalyze\n");
+       fprintf(fp,"report at http://dtype.org/keyanalyze/.\n\n");
+
+       fprintf(fp,"Note: Key signature counts and lists are from a pruned list that only\n");
+       fprintf(fp,"includes keys with signatures other than their own.\n\n");
+
+       fprintf(fp,"Signatures to this key:\n");
+       totalsigsto = PrintKeyList(fp, keys[key].to);
+       fprintf(fp,"Total: %d signatures to this id from this set\n\n",totalsigsto);
+                
+       fprintf(fp,"Signatures from this key:\n");
+       totalsigsfrom = PrintKeyList(fp, keys[key].from);
+       fprintf(fp,"Total: %d signatures from this id to this set\n\n",totalsigsfrom);
+}
+
+/* ################################################################# */
+/* thread routine */
+
+void *thread_slave(void *arg) {
+       int     i,j;
+       float   threadmean;
+       sig     *distant_sigs = NULL;
+       FILE    *fpindiv;
+
+       int             hops[MAXHOPS]; /* array for hop histogram */
+       int             hophigh; /* highest number of hops for this key */
+
+       threadparam data = *(threadparam *)arg;
+
+       for (i=0;i<numkeys;i++) {
+               struct keydata *key = &keys[i];
+               /* do this for all set2 now */
+               if (reachable[i] && ((i%2)==data.threadnum)) {
+                       /* zero out hop histogram */
+                       memset(hops,0,sizeof(int)*MAXHOPS);
+                       hophigh = 0;
+
+                       threadmean = MeanDistance(i,hops,&hophigh,&distant_sigs);
+
+                       pthread_mutex_lock(&mean_l);
+                       meantotal += threadmean;
+                       fprintf(fpmsd,"%08X %08X %8.4f\n"
+                               ,key->id1, key->id2, threadmean);
+                       fflush(fpmsd);
+                       pthread_mutex_unlock(&mean_l);
+
+                       /* individual report */
+                       fpindiv = OpenFileById(key->id2);
+                       IndivReport(fpindiv,i);
+                       fprintf(fpindiv, "This key is %sin the strong set.\n",
+                               component[i] == max_component ? "" : "not ");
+                       fprintf(fpindiv,"Mean distance to this key from strong set: %8.4f\n\n",threadmean);
+                       fprintf(fpindiv,"Breakout by hop count (only from strong set):\n");
+                       for (j=0;j<=hophigh;j++) {
+                               fprintf(fpindiv,"%2d hops: %5d\n",j,hops[j]);
+                       }
+                       if (distant_sigs) {
+                               fprintf(fpindiv,"\nFarthest keys (%d hops):\n", j-1);
+                               PrintKeyList(fpindiv, distant_sigs);
+                               DeleteKeyList(&distant_sigs);
+                       }
+                       fclose(fpindiv);
+               } 
+       }
+       return NULL;
+}
+
+/* ################################################################# */
+/* main() */
+
+int main(int argc, char **argv)
+{
+       pthread_t       *slave0,*slave1;
+       threadparam arg0,arg1;
+       void            *retval;
+
+       ParseArgs(argc, argv);
+       if (OpenFiles()) {
+               fprintf(stderr, "Error opening files.\n");
+               exit(EXIT_FAILURE);
+       }
+       ReadInput();
+       TestConnectivity();
+
+       pthread_mutex_init(&mean_l,NULL);
+       slave0 = (pthread_t *) calloc(1, sizeof(pthread_t));
+       slave1 = (pthread_t *) calloc(1, sizeof(pthread_t));
+       arg0.threadnum = 0;
+       arg1.threadnum = 1;
+
+       if (pthread_create(slave0,NULL,thread_slave,&arg0)) {
+               fprintf(stderr,"Cannot create thread 0.");
+       }
+       if (pthread_create(slave1,NULL,thread_slave,&arg1)) {
+               fprintf(stderr,"Cannot create thread 1.");
+       }
+       pthread_join(*slave0, &retval);
+       pthread_join(*slave1, &retval);
+
+       fprintf(fpout,"Average mean is %9.4f\n",meantotal/num_reachable);
+       /* ReportMostSignatures(); */
+       CloseFiles(); 
+       return 0;
+}