/******************************************************************************/ /* * 1.c -- (partial) solution to assignment 3, exercise 1 * * There are 3 values, that could be modified: * sum_jitter which sum of statistics for each letter to * accept; higher number gives more results * stat_jitter which letter to involve in same set of possible * changes; higher number, more results * words_jitter how many words from dictionary to find in the * text after processing to give less results * * How does it work: it computes statistics of letters and compares with * statistics of usual English texts. Then, it creates sets of letters, * that could be instead of each other and creates permutations of them * depending on stat_jitter. In the next step, if sum of statistics is * less than sum_jitter, it decrypts text and finds words from dictionary. * If is there enough of hits (words_jitter), it writes to stdout the * result. * * Finally, this is a very slow version. There should be some interaction * with user or many more checks against dictionary, but there is no more * time :(. * * Copyright (C) 2005 Jiri Slaby (jirislaby@gmail.com) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include #include #include #include /** array of dictionary words */ static char *dict[] = { //# include "dict.h" "THE", "AND", " OR ", " A ", NULL }; /*static float perc[] = { 8.05, 1.62, 3.20, 3.65, 12.31, 2.28, 1.61, 5.14, 7.18, 0.1, 0.52, 4.03, 2.25, 7.19, 7.94, 2.29, 0.20, 6.03, 6.59, 9.59, 3.1, 0.93, 2.03, 0.2, 1.88, 0.09 }; */ /** array with usual percentual occurences of letters in text */ static float perc[] = { 8.09, 1.46, 2.61, 4.25, 12.44, 2.16, 1.98, 5.74, 7.33, 0.14, 0.91, 4.21, 2.74, 6.93, 7.7, 1.76, 0.09, 5.64, 6.27, 8.99, 2.91, 1.01, 2.39, 0.12, 2.07, 0.06 }; #define BLOCK (10 * 1024) #define ABCD "ABCDEFGHIJKLMNOPQRSTUVWXYZ" static float sum_jitter = 15.0; static float stat_jitter = 1.5; static unsigned int words_jitter = 2; /** * Function to read whole file into memory. Return value should be freed by * calling free(2) function. * * @param file file to read from * @return memory filled with content of the file */ static char *readfile(FILE *file) { unsigned int len = BLOCK * 10; char *tmp, *in1, *in = malloc(len); size_t r; if (in == NULL) { perror("malloc in readfile"); goto end; } tmp = in; while ((r = fread(tmp, 1, BLOCK, file)) > 0) { tmp[r] = 0; tmp += r; if (tmp - in + BLOCK >= len) { len += BLOCK * 10; in1 = realloc(in, len); if (in1 == NULL) { perror("realloc in readfile"); goto errfrin; } tmp = in1 + (tmp - in); in = in1; } } if (!ferror(file)) goto end; fprintf(stderr, "read in readfile failed!\n"); errfrin: free(in); return NULL; end: return in; } /** * Compute percentual statistics from first 5000 characters of input. * * @param str input text, which use for computing statistics * @return array for each letter with relatives */ static float *compute_stats(const char *str) { unsigned int a, len = 0, cnt[26]; float *res = calloc(1, 26 * sizeof(float)); if (res == NULL) { perror("calloc in compute_stats"); goto end; } memset(&cnt, 0, 26 * sizeof(unsigned int)); while (*str) { if (*str >= 'A' && *str <= 'Z') { cnt[*str - 'A']++; len++; } if (len > 5000) break; str++; } if (!len) goto end; for (a = 0; a < 26; a++) if (cnt[a]) res[a] = 100.0 * cnt[a] / len; end: return res; } /** * Prints stats to stdout in readable form. * * @param stats stats to print * @param p permutation to consider */ static void print_stats(const float *stats, const char *p) { /* unsigned int a, b; puts(" stat norm diff"); for (a = 0; a < 26; a++) { b = p[a] - 'A'; fprintf(stderr, "%c %6.3f%7.2f%8.3f\n", a + 'A', stats[b], perc[a], fabs(stats[b] - perc[a])); }*/ } /** * Sum the statistics. * Complexity: n * * @param stats statistics to compute from * @param p permutation of letter, which have to be considered in computing * @return sum of statistics */ static float sum_stats(const float *stats, const char *p) { float sum = 0.0; unsigned int a; for (a = 0; a < 26; a++) sum += fabs(stats[p[a] - 'A'] - perc[a]); return sum; } static void list_print(unsigned char *list) { unsigned int a, b; for (a = 0; a < 26; a++) { fprintf(stderr, "|%c (%2d)|", a + 'A', list[a * 27]); for (b = 0; b < 26; b++) if (list[a * 27 + b + 1]) fprintf(stderr, "->%c", b + 'A'); fprintf(stderr, "\n"); } } /** * Creates list of both-ways catenated arrays containing letters that could be * considered to be changed to some other * Complexity: n^2 * * @param stats statictics of letters in text * @return list of arrays */ static void approx_list(unsigned char *list, const float *stats) { unsigned int a, b; for (a = 0; a < 26; a++) for (b = 0; b < 26; b++) if (fabs(stats[(a + b) % 26] - perc[a]) < stat_jitter) { list[a * 27 + (a + b) % 26 + 1] = 1; list[a * 27]++; } list_print(list); } /** * Change all input text depending on permutation. * * @param in text to change * @param perm permutation to consider */ static void change_text(char *in, const char *perm) { unsigned int a, b; char rev[26]; for (a = 0; a < 26; a++) { /* compute reverse to perm */ for (b = 0; b < 26; b++) if (perm[b] - 'A' == a) break; rev[a] = b + 'A'; } while (*in) { if (*in >= 'A' && *in <= 'Z') *in = rev[*in - 'A']; in++; } } /** * Find words in text and if words_jitter is satisfied, return 1. * * @param text where to find * @return 1 if enough of words were found */ static int find_words(char *text) { char **word; unsigned int count = 0, a = 0; for (word = dict; *word; word++, a++) if (strstr(text, *word)) if (++count > words_jitter) return 1; return 0; } /** * Computes sum of stats for this permutation and tries, if are there enough of * words * * @param st statistics to consider * @param in text to decrypt * @param text permutation * @return -1 on error, 0=OK */ static inline int try_this(const float *st, const char *in, const char *text) { char *in1; if (sum_stats(st, text) < sum_jitter) { in1 = malloc(501); if (in1 == NULL) { perror("malloc in try_this"); goto end; } memcpy(in1, in, 500); in1[500] = 0; change_text(in1, text); if (find_words(in1)) { printf("%s: ", text); puts(in1); } free(in1); } return 0; end: return -1; } /** * Makes permutations and calls functions to test them. * * @param st statictics of letters * @return status, 0=OK */ static int permute(const float *st, const char *in) { unsigned int a, b; unsigned char *pos, *firsts, *let, *list, *tmp; char text[27]; tmp = malloc(26 + 26 + 26 + 26 * 27); if (tmp == NULL) { perror("malloc in permute"); return -1; } pos = tmp; firsts = tmp + 26; let = tmp + 26 + 26; list = tmp + 26 + 26 + 26; approx_list(list, st); for (a = 0; a < 26; a++) { /* check the list */ if (list[a * 27] == 0) { fprintf(stderr, "No results. Try to adjust jitter " "values!\n"); goto end; } for (b = 1; b <= 26; b++) if (list[a * 27 + b]) break; pos[a] = firsts[a] = b; } memset(text, 0, 27); memset(let, 0, 26); a = 0; while (1) { while (1) { for (b = pos[a]; b <= 26; b++) if (list[a * 27 + b] && !let[b - 1]) break; if (b > 26) { if (text[a]) { let[text[a] - 'A'] = 0; text[a] = 0; } pos[a] = firsts[a]; if (!a--) goto end; pos[a]++; let[text[a] - 'A'] = 0; text[a] = 0; continue; } pos[a] = b; text[a] = b + 'A' - 1; let[b - 1]++; if (++a > 25) break; } if (try_this(st, in, text)) goto end; a = 24; let[text[25] - 'A'] = 0; let[text[24] - 'A'] = 0; pos[25] = firsts[25]; pos[24]++; text[24] = text[25] = 0; } end: free(tmp); return 0; } /* * Function to prin usage of program. * * @param name name of the executable */ static void print_usage(char *name) { puts("codebreaker v. 0.01"); puts("\tyou need to add name of a file to decrypt:"); printf("\t%s \n", name); puts("\tif you have permutation of letters, you want this:"); printf("\t%s \n", name); } /* * Main function called by lower _main one. * * @param argc number of args * @param argv args * @return command-line return code */ int main(int argc, char **argv) { float *st; char *in, *perm; if (argc != 2 && argc != 3) print_usage(*argv); perm = strdup(ABCD); if (perm == NULL) goto end; FILE *file = fopen(argv[1], "r"); if (file == NULL) goto errfrpe; in = readfile(file); fclose(file); if (in == NULL) goto errfrpe; st = compute_stats(in); if (st == NULL) goto errfrin; print_stats(st, perm); if (argc == 2) permute(st, in); else { change_text(in, argv[2]); puts(in); } free(st); errfrin: free(in); errfrpe: free(perm); end: return 0; }