pan13-poster/poster.tex

   1 \documentclass[a0,portrait]{sciposter}\r
   2 \r
   3 \usepackage{epsfig}\r
   4 \usepackage{amsmath}\r
   5 \usepackage{amssymb}\r
   6 \usepackage{multicol}\r
   7 \usepackage{bera}\r
   8 \usepackage[utf8]{inputenc}\r
   9 %\usepackage{fancybullets}\r
  10 %\usepackage{floatflt}\r
  11 %\usepackage{graphics}\r
  12 \r
  13 \definecolor{BoxCol}{rgb}{0.9,0.9,1}\r
  14 % uncomment for light blue background to \section boxes \r
  15 % for use with default option boxedsections\r
  16 \r
  17 \definecolor{SectionCol}{rgb}{0,0,0.5}\r
  18 % uncomment for dark blue \section text \r
  19 \r
  20 \definecolor{ReallyEmph}{rgb}{0.7,0,0}\r
  21 \r
  22 \renewcommand{\titlesize}{\Huge}\r
  23 \title{Diverse Queries and Feature Type Selection \\ for Plagiarism Discovery}\r
  24 \r
  25 % Note: only give author names, not institute\r
  26 \author{Šimon Suchomel, Jan Kasprzak, and Michal Brandejs}\r
  27  \r
  28 % insert correct institute name\r
  29 \institute{Faculty of Informatics, Masaryk University, Brno, Czech Republic}\r
  30 \r
  31 % \email{kas@fi.muni.cz}  % shows author email address below institute\r
  32 \r
  33 %\date is unused by the current \maketitle\r
  34 \r
  35 \font\logofont=fi-logo600 at .16\textwidth\r
  36 \r
  37 \renewcommand{\sectionsize}{\Large}\r
  38 \r
  39 \newcommand{\cemph}[1]{{\sffamily\bfseries\itshape \textcolor{SectionCol}{#1}}}\r
  40 \newcommand{\lemph}[1]{{\rmfamily\itshape \textcolor{SectionCol}{#1}}}\r
  41 \newcommand{\eitem}[1]{\item \cemph{#1}}\r
  42 \r
  43 \newenvironment{ytemize}\r
  44   { \begin{itemize}\r
  45         \setlength{\itemsep}{0pt}\r
  46         \setlength{\parskip}{0pt}\r
  47   }\r
  48   { \end{itemize} }\r
  49 \r
  50 \conference{{\bf CLEF 2013}, 23--27 September 2013, Valencia, Spain}\r
  51 \r
  52 \setlength{\figbotskip}{\smallskipamount}\r
  53 \r
  54 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\r
  55 %%% Begin of Document\r
  56 \r
  57 \begin{document}\r
  58 \r
  59 \r
  60 %\LEFTSIDEfootlogo  \r
  61 % Uncomment to put footer logo on left side, and \r
  62 % conference name on right side of footer\r
  63 \r
  64 % Some examples of caption control (remove % to check result)\r
  65 \r
  66 %\renewcommand{\algorithmname}{Algoritme} % for Dutch\r
  67 \r
  68 %\renewcommand{\mastercapstartstyle}[1]{\textit{\textbf{#1}}}\r
  69 %\renewcommand{\algcapstartstyle}[1]{\textsc{\textbf{#1}}}\r
  70 %\renewcommand{\algcapbodystyle}{\bfseries}\r
  71 %\renewcommand{\thealgorithm}{\Roman{algorithm}}\r
  72 \r
  73 % \maketitle\r
  74 \r
  75 \vspace*{-.06\textwidth}\r
  76 \r
  77 \hbox to \hsize{\r
  78 \begin{minipage}[c]{.11\textwidth}\r
  79         \vspace{-.75\textwidth}\r
  80         \hbox{\hskip -.83\textwidth\includegraphics[width=3\textwidth]{znak_MU_modry}\hskip -\textwidth}\r
  81         \vspace{-\textwidth}\r
  82 \end{minipage}\r
  83 \hfil\r
  84 \begin{minipage}[c]{.7\textwidth}\r
  85 \begin{center}\r
  86       \renewcommand{\baselinestretch}{2.0}\normalsize\r
  87       {\titlesize \bf \@title}\par\r
  88       \renewcommand{\baselinestretch}{1.0}\normalsize            \r
  89       \vspace{0.4\titleskip}\r
  90       {\authorsize {\bf\@author} \par}\r
  91       {\instsize\r
  92        \vspace{0.2\titleskip}\r
  93        \theinstitute \par\r
  94        \ifthenelse{\equal{\printemail}{}}{%nothing\r
  95          }{%\r
  96          \vspace{0.2\titleskip}\r
  97          \texttt{\printemail}\r
  98          }\r
  99       }\r
 100 \end{center}\r
 101 \end{minipage}\r
 102 \hfil\r
 103 \begin{minipage}[c]{.15\textwidth}\r
 104         \hbox to \hsize{\logofont SL\hss}\r
 105 \end{minipage}\r
 106 }\r
 107 \r
 108 \vspace{-.02\textwidth}\r
 109 \r
 110 %%% Begin of Multicols-Enviroment\r
 111 %\begin{abstract}\r
 112 %{\sffamily\itshape\r
 113 %Nějaký abstrakt.\r
 114 %}\r
 115 %\end{abstract}\r
 116 \r
 117 \r
 118 \begin{multicols}{2}\setlength{\columnseprule}{0pt}\r
 119 \section{Introduction}\r
 120 %\r
 121 PAN 2013 LOrem ipsum Lorem ipsum Lorem ipsumLorem ipsumLorem ipsumLorem ipsumLorem ipsum \r
 122 %\r
 123 \vfill\r
 124 \columnbreak\r
 125 %\r
 126 \begin{figure}\r
 127  \centering\r
 128   \includegraphics[width=0.6\textwidth]{img/source_retrieval_process.pdf}\r
 129   \caption{Plagiarism discovery process.}\r
 130   \label{fig:process}\r
 131 \end{figure} \r
 132 \end{multicols}\r
 133 \begin{multicols}{2}\r
 134 %\rm\r
 135 %%% Introduction\r
 136 \section{Querying}\r
 137 Querying means to effectively utilize the search engine in order to retrieve as many relevant\r
 138 documents as possible with the minimum amount of queries.\r
 139 %We consider the resulting document relevantif it shares some of text characteristics with the suspicious document.\r
 140 In real-world queries as such represent appreciable cost, therefore their minimization should be one of the top priorities. \r
 141 %\subsection{Types of Queries}\r
 142 From the suspicious document, there were three diverse types of queries extracted.\\\r
 143 \begin{minipage}{0.55\linewidth}\r
 144 \subsection{Keywords Based Queries}\r
 145 \begin{ytemize}\r
 146 \item TF--IDF base automated keywords extraction;\r
 147 \item 5-token long; \r
 148 \item Deterministic;\r
 149 \item Non-positional;\r
 150 \item Non-phrasal.\r
 151 \end{ytemize}\r
 152 \end{minipage}\r
 153 \begin{minipage}{0.45\linewidth}\r
 154 \begin{figure}[h]\r
 155  %\centering\r
 156   \includegraphics[width=1\linewidth]{img/document_keywords.pdf}\r
 157 \end{figure}\r
 158 \end{minipage}\r
 159 \begin{minipage}{0.55\linewidth}\r
 160 \subsection{Intrinsic Plagiarism Based Queries}\r
 161 \begin{ytemize}\r
 162 \item Averaged Word Frequency Class based chunking~\cite{AWFC};\r
 163 \item Random sentence selection from the chunk;\r
 164 \item Non-deterministic;\r
 165 \item Positional;\r
 166 \item Phrasal.\r
 167 \end{ytemize}\r
 168 \end{minipage}\r
 169 \begin{minipage}{0.45\linewidth}\r
 170 \begin{figure}[h]\r
 171  %\centering\r
 172   \includegraphics[width=1\linewidth]{img/document_awfc.pdf}\r
 173 \end{figure}\r
 174 \end{minipage}\r
 175 \begin{minipage}{0.55\linewidth}\r
 176 \subsection{Paragraph Based Queries}\r
 177 \begin{ytemize}\r
 178 \item Longest sentences from miscellaneous paragraphs;\r
 179 \item Deterministic;\r
 180 \item Positional;\r
 181 \item Phrasal.\r
 182 \end{ytemize}\r
 183 \end{minipage}\r
 184 \begin{minipage}{0.45\linewidth}\r
 185 \begin{figure}[h]\r
 186  %\centering\r
 187   \includegraphics[width=1\linewidth]{img/document_paragraphs.pdf}\r
 188 \end{figure}\r
 189 \end{minipage}\r
 190 \r
 191 \begin{figure}[h]\r
 192  \centering\r
 193   \includegraphics[width=0.8\linewidth]{img/queryprocess.pdf}\r
 194    \caption{Stepwise queries execution process.}\r
 195 \end{figure}\r
 196 \r
 197 \section{Selecting}\r
 198 Document snippets were used for deciding whether to download the document for the text alignment.\r
 199 We used 2-tuples measurement, which indicates how many neighbouring word pairs coexist in the snippet and in the suspicious document.\r
 200 Performance of this measure is depicted at picture~\ref{fig:snippet_graph}.\r
 201 Having this measure, a threshold for download decision needs to be set in order to maximize all discovered similarities\r
 202 and minimize total downloads.\r
 203 A profitable threshold is such that matches with the largest distance between those two curves.\r
 204 \begin{figure}\r
 205   \centering\r
 206   \includegraphics[width=0.8\textwidth]{img/snippets_graph.pdf}\r
 207   \caption{Downloads and similarities performance.}\r
 208   \label{fig:snippet_graph}\r
 209 \end{figure}\r
 210 \r
 211 \r
 212 %\r
 213 % Yenyova cast\r
 214 %\r
 215 \r
 216 \section{Text Alignment}\r
 217 \r
 218 %\r
 219 % Spolecna cast\r
 220 %\r
 221 \r
 222 \section{Conclusion}\r
 223 \r
 224 Nějaký závěr\r
 225 \r
 226 %%% References\r
 227 \r
 228 %% Note: use of BibTeX als works!!\r
 229 \r
 230 \bibliographystyle{plain}\r
 231 \begin{thebibliography}{1}\r
 232 \r
 233 \bibitem{ISMU}\r
 234 \cemph{Masaryk University Information System}\\\r
 235 {\tt http://is.muni.cz/}, contact: {\tt iscor@fi.muni.cz}.\r
 236 \r
 237 \bibitem{Theses}\r
 238 \cemph{Czech National Archive of Graduate Theses}\\\r
 239 {\tt http://theses.cz/}, contact: {\tt theses@fi.muni.cz}.\r
 240 \r
 241 \bibitem{AWFC}\r
 242 \cemph{Sven Meyer Zu Eissen and Benno Stein: Intrinsic Plagiarism Detection}\\\r
 243 {\tt Proceedings of the European Conference on Information Retrieval (ECIR-06)}, {\tt 2006}\r
 244 \r
 245 \end{thebibliography}\r
 246 \r
 247 \smallskip\r
 248 \hrule height .1em\r
 249 \medskip\r
 250 \r
 251 % \sffamily\r
 252 \r
 253 QR kód?\r
 254 \r
 255 \cemph{Contact information:}\\\r
 256         Šimon Suchomel {\tt suchomel@fi.muni.cz},\\\r
 257         Jan Kasprzak, {\tt kas@fi.muni.cz}.\r
 258 \r
 259 \r
 260 \end{multicols}\r
 261 \r
 262 \end{document}\r
 263 \r