From: Simon Suchomel Date: Wed, 18 Sep 2013 20:43:05 +0000 (+0200) Subject: 1. nastrel X-Git-Tag: 20130920-vytisteno~13 X-Git-Url: https://www.fi.muni.cz/~kas/git//home/kas/public_html/git/?p=pan13-paper.git;a=commitdiff_plain;h=95bbf9a1fc66f175da1daff45dc1601b2a9caa69 1. nastrel --- diff --git a/pan13-paper/pan13-notebook.pdf b/pan13-paper/pan13-notebook.pdf deleted file mode 100644 index 83b37bc..0000000 Binary files a/pan13-paper/pan13-notebook.pdf and /dev/null differ diff --git a/pan13-poster/img/snippets_graph.pdf b/pan13-poster/img/snippets_graph.pdf new file mode 100755 index 0000000..96192d0 Binary files /dev/null and b/pan13-poster/img/snippets_graph.pdf differ diff --git a/pan13-poster/img/source_retrieval_process.pdf b/pan13-poster/img/source_retrieval_process.pdf new file mode 100755 index 0000000..d275b61 Binary files /dev/null and b/pan13-poster/img/source_retrieval_process.pdf differ diff --git a/pan13-poster/poster.tex b/pan13-poster/poster.tex old mode 100644 new mode 100755 index ce9f7a3..c3ab1c5 --- a/pan13-poster/poster.tex +++ b/pan13-poster/poster.tex @@ -18,7 +18,7 @@ \definecolor{ReallyEmph}{rgb}{0.7,0,0} \renewcommand{\titlesize}{\Huge} -\title{Distributed System \\ for Discovering Similar Documents} +\title{Diverse Queries and Feature Type Selection \\ for Plagiarism Discovery} % Note: only give author names, not institute \author{Šimon Suchomel, Jan Kasprzak, and Michal Brandejs} @@ -45,7 +45,7 @@ } { \end{itemize} } -\conference{{\bf ICEIS 2008}, 12--16 June 2008, Barcelona, Spain} +\conference{{\bf CLEF 2013}, 23--27 September 2013, Valencia, Spain} \setlength{\figbotskip}{\smallskipamount} @@ -106,25 +106,78 @@ \vspace{-.02\textwidth} %%% Begin of Multicols-Enviroment -\begin{multicols}{2} +%\begin{abstract} +%{\sffamily\itshape +%Nějaký abstrakt. +%} +%\end{abstract} -\rm -%%% Abstract -\begin{abstract} -{\sffamily\itshape +\begin{multicols}{2}\setlength{\columnseprule}{0pt} -Nějaký abstrakt. -} -\end{abstract} +\section{Introduction} +PAN 2013 LOrem ipsum Lorem ipsum Lorem ipsumLorem ipsumLorem ipsumLorem ipsumLorem ipsum + + + +\begin{figure} + \centering + \includegraphics[width=0.8\textwidth]{img/source_retrieval_process.pdf} + \caption{Plagiarism discovery process.} + \label{fig:process} +\end{figure} + + +\end{multicols} -%%% Introduction -\section{Šimonova část} -\subsection{Kdovíco} -\section{Yenyova část} +\begin{multicols}{2} + +%\rm + +%%% Introduction +\section{Querying} +Querying means to effectively utilize the search engine in order to retrieve as many relevant +documents as possible with the minimum amount of queries. +%We consider the resulting document relevantif it shares some of text characteristics with the suspicious document. +In real-world queries as such represent appreciable cost, therefore their minimization should be one of the top priorities. \\ +\subsection{Types of Queries} +From the suspicious document, there were three diverse types of queries extracted. +\subsubsection{Keywords Based Queries} +\begin{ytemize} +\item TF--IDF base automated keywords extraction; +\item 5-token long; +\item Deterministic; +\item Non-positional; +\item Non-phrasal. +\end{ytemize} +\subsubsection{Intrinsic Plagiarism Based Queries} +\begin{ytemize} +\item Averaged Word Frequency Class based chunking~\cite{AWFC}; +\item Random sentence selection from the chunk; +\item Non-deterministic; +\item Positional; +\item Phrasal. +\end{ytemize} +\subsubsection{Paragraph Based Queries} +\begin{ytemize} +\item Longest sentences from miscellaneous paragraphs; +\item Deterministic; +\item Positional; +\item Phrasal. +\end{ytemize} + +\section{Selecting} +\begin{figure} + \centering + \includegraphics[width=0.8\textwidth]{img/snippets_graph.pdf} + \caption{Downloads and similarities performance.} + \label{fig:snippet_graph} +\end{figure} + +\section{Text Alignment} \section{Conclusion} @@ -145,6 +198,10 @@ Nějaký závěr \cemph{Czech National Archive of Graduate Theses}\\ {\tt http://theses.cz/}, contact: {\tt theses@fi.muni.cz}. +\bibitem{AWFC} +\cemph{Sven Meyer Zu Eissen and Benno Stein: Intrinsic Plagiarism Detection}\\ +{\tt Proceedings of the European Conference on Information Retrieval (ECIR-06)}, {\tt 2006} + \end{thebibliography} \smallskip