From 6f9a98fd5a5b037192c8bd2359a7a7d366343268 Mon Sep 17 00:00:00 2001 From: "Jan \"Yenya\" Kasprzak" Date: Fri, 20 Sep 2013 11:34:56 +0200 Subject: [PATCH] Presun na xelatex, barevne subsection, ytemize --- pan13-poster/poster.tex | 48 +++++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 19 deletions(-) diff --git a/pan13-poster/poster.tex b/pan13-poster/poster.tex index 56f1111..e166b91 100755 --- a/pan13-poster/poster.tex +++ b/pan13-poster/poster.tex @@ -4,11 +4,15 @@ \usepackage{amsmath} \usepackage{amssymb} \usepackage{multicol} -\usepackage{bera} \usepackage[utf8]{inputenc} %\usepackage{fancybullets} %\usepackage{floatflt} %\usepackage{graphics} +\usepackage{fontspec} +\usepackage{xunicode} +\setmainfont[Mapping=tex-text]{DejaVu Sans} +\setsansfont[Mapping=tex-text]{DejaVu Sans} +\setmonofont[Mapping=tex-text]{DejaVu Sans Mono} \definecolor{BoxCol}{rgb}{0.9,0.9,1} % uncomment for light blue background to \section boxes @@ -51,6 +55,13 @@ \setlength{\figbotskip}{\smallskipamount} +\renewcommand{\SubSection}[2][?]{ + \vspace{0.5\secskip} + \refstepcounter{subsection} + {\bf \subsectionsize \textcolor{SectionCol}{\arabic{section}.\arabic{subsection}~#2}} + \par\vspace{0.375\secskip} +} + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%% Begin of Document @@ -125,8 +136,7 @@ input document must be highlighted. This poster presents methodology used during The whole process is depicted at picture~\ref{fig:process}. The source retrieval task is divided into 2 subtasks: Quering and Selecting, during which the software utilizes given search engine. The retrieved sources must be examined in detail in order to highlight as many plagiarism cases as possible. This process is depicted -as Text Alignment. - +as Text Alignment. Results of this process are called {\em detections}, i.e.~passages of {\em source document} and {\em suspicious document}, which are similar enough to each other, and can serve as a basis for further manual examination for possible plagiarism. % \vfill \columnbreak @@ -225,32 +235,32 @@ A profitable threshold is such that matches with the largest distance between th The system uses the same basic principles as in \cite{suchomel_kas_12}: -\begin{itemize} +\begin{ytemize} \item{\cemph{common features} between source and suspicious documents} -\begin{itemize} +\begin{ytemize} \item{word 5-grams} \item{stop-word 8-grams \cite{stamatatos2011plagiarism}} -\end{itemize} +\end{ytemize} \item{\cemph{valid intervals} of characters covered by common features ``densely enough''} \item{\cemph{postprocessing}---remove overlapping detections, join neighbouring detections} -\end{itemize} +\end{ytemize} \subsection{Alternative Features} -\begin{itemize} +\begin{ytemize} \item{\cemph{contextual n-grams} \cite{torrejondetailed}} -\begin{itemize} +\begin{ytemize} \item{\cemph{The quick} brown \cemph{fox jumped} over the lazy dogs.} \item{The \cemph{quick brown} fox \cemph{jumped over} the lazy dogs.} -\end{itemize} +\end{ytemize} \item{plain word 4-grams} -\begin{itemize} +\begin{ytemize} \item{\cemph{The quick brown fox} jumped over the lazy dogs.} \item{The \cemph{quick brown fox jumped} over the lazy dogs.} -\end{itemize} -\end{itemize} +\end{ytemize} +\end{ytemize} \begin{table} @@ -271,11 +281,11 @@ plain 4-grams & \cemph{0.7556} & 0.7340 & 1.0000 & \cemph{0.7447} \\ \subsection{Global Postprocessing} -\begin{itemize} +\begin{ytemize} \item{Similar to PAN 2010 \cite{Kasprzak2010}} \item{Overlapping detections removal} \item{\cemph{Result:} improvement, but not as big as in 2010} -\end{itemize} +\end{ytemize} % % Spolecna cast @@ -285,18 +295,18 @@ plain 4-grams & \cemph{0.7556} & 0.7340 & 1.0000 & \cemph{0.7447} \\ \subsection{Candidate retrieval} -\begin{itemize} +\begin{ytemize} \item{Second best ratio of recall to the number of queries} \item{Missing support for phrasal search in ChatNoir is a big stumbling block} -\end{itemize} +\end{ytemize} \subsection{Text alignment} -\begin{itemize} +\begin{ytemize} \item{Significant improvement against PAN 2013} \item{Word 4-grams are better than contextual 4-grams} \item{We need a better ranking system than plagdet!} -\end{itemize} +\end{ytemize} %%% References -- 2.43.0