% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dataHandling.R
\name{tradesCleanupUsingQuotes}
\alias{tradesCleanupUsingQuotes}
\title{Perform a final cleaning procedure on trade data}
\usage{
tradesCleanupUsingQuotes(
  tradeDataSource = NULL,
  quoteDataSource = NULL,
  dataDestination = NULL,
  tData = NULL,
  qData = NULL,
  lagQuotes = 0,
  nSpreads = 1,
  BFM = FALSE,
  backwardsWindow = 3600,
  forwardsWindow = 0.5,
  plot = FALSE
)
}
\arguments{
\item{tradeDataSource}{character indicating the folder in which the original trade data is stored.}

\item{quoteDataSource}{character indicating the folder in which the original quote data is stored.}

\item{dataDestination}{character indicating the folder in which the cleaned data is stored, folder of \code{dataSource} by default.}

\item{tData}{\code{data.table} or \code{xts} object containing trade data cleaned by \code{\link{tradesCleanup}}. This argument is \code{NULL} by default. Enabling it, means the arguments
\code{from}, \code{to}, \code{dataSource} and \code{dataDestination} will be ignored (only advisable for small chunks of data).}

\item{qData}{\code{data.table} or \code{xts} object containing cleaned quote data. This argument is NULL by default. Enabling it means the arguments
\code{from}, \code{to}, \code{dataSource}, \code{dataDestination} will be ignored (only advisable for small chunks of data).}

\item{lagQuotes}{numeric, number of seconds the quotes are registered faster than
the trades (should be round and positive). Default is 0. For older datasets, i.e. before 2010, it may be a good idea to set this to, e.g., 2 (see, Vergote, 2005).}

\item{nSpreads}{numeric of length 1 denotes how far above the offer and below bid we allow outliers to be. Trades are filtered out if they are MORE THAN nSpread * spread above (below) the offer (bid)}

\item{BFM}{a logical determining whether to conduct "Backwards - Forwards matching" of trades and quotes.
The algorithm tries to match trades that fall outside the bid - ask and first tries to match a small window forwards and if this fails, it tries to match backwards in a bigger window.
The small window is a tolerance for inaccuracies in the timestamps of bids and asks. The backwards window allow for matching of late reported trades, i.e. block trades.}

\item{backwardsWindow}{a numeric denoting the length of the backwards window used when \code{BFM = TRUE}. Default is 3600, corresponding to one hour.}

\item{forwardsWindow}{a numeric denoting the length of the forwards window used when \code{BFM = TRUE}. Default is 0.5, corresponding to one half second.}

\item{plot}{a logical denoting whether to visualize the forwards, backwards, and unmatched trades in a plot. Passed on to \code{\link{rmTradeOutliersUsingQuotes}}}
}
\value{
For each day an \code{xts} object is saved into the folder of that date, containing the cleaned data.
}
\description{
Function performs cleaning procedure \code{\link{rmTradeOutliersUsingQuotes}} 
for the trades of all stocks data in "dataDestination". 
Note that preferably the input data for this function 
is trade and quote data cleaned by respectively e.g. \code{\link{tradesCleanup}}
and \code{\link{quotesCleanup}}.
}
\details{
In case you supply the arguments \code{tData} and \code{qData}, the on-disk functionality is ignored
and the function returns cleaned trades as a \code{data.table} or \code{xts} object (see examples).

When using the on-disk functionality and tradeDataSource and quoteDataSource are the same, the quote files are all files in the folder that contains 'quote', and the rest are treated as containing trade data.
}
\examples{
# Consider you have raw trade data for 1 stock for 2 days 
tDataAfterFirstCleaning <- tradesCleanup(tDataRaw = sampleTDataRaw, 
                                          exchanges = "N", report = FALSE)
qData <- quotesCleanup(qDataRaw = sampleQDataRaw, 
                       exchanges = "N", report = FALSE)
dim(tDataAfterFirstCleaning)
tDataAfterFinalCleaning <- 
  tradesCleanupUsingQuotes(qData = qData[as.Date(DT) == "2018-01-02"],
                           tData = tDataAfterFirstCleaning[as.Date(DT) == "2018-01-02"])
dim(tDataAfterFinalCleaning)
# In case you have more data it is advised to use the on-disk functionality
# via the "tradeDataSource", "quoteDataSource", and "dataDestination" arguments
}
\references{
Barndorff-Nielsen, O. E., Hansen, P. R., Lunde, A., and Shephard, N. (2009). Realized kernels in practice: Trades and quotes. \emph{Econometrics Journal}, 12, C1-C32.

Brownlees, C.T., and Gallo, G.M. (2006). Financial econometric analysis at ultra-high frequency: Data handling concerns. \emph{Computational Statistics & Data Analysis}, 51, 2232-2245.

Christensen, K., Oomen, R. C. A., Podolskij, M. (2014): Fact or Friction: Jumps at ultra high frequency. \emph{Journal of Financial Economics}, 144, 576-599
}
\author{
Jonathan Cornelissen, Kris Boudt, Onno Kleen, and Emil Sjoerup.
}
\keyword{cleaning}
