% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/readFasta2.R
\name{readFasta2}
\alias{readFasta2}
\title{Read file of protein sequences in fasta format}
\usage{
readFasta2(
  filename,
  delim = "|",
  databaseSign = c("sp", "tr", "generic", "gi"),
  removeEntries = NULL,
  tableOut = FALSE,
  UniprSep = c("OS=", "OX=", "GN=", "PE=", "SV="),
  cleanCols = TRUE,
  silent = FALSE,
  callFrom = NULL,
  debug = FALSE
)
}
\arguments{
\item{filename}{(character) names fasta-file to be read}

\item{delim}{(character) delimeter at header-line}

\item{databaseSign}{(character) characters at beginning right after the '>' (typically specifying the data-base-origin), they will be excluded from the sequance-header}

\item{removeEntries}{(character) if \code{'empty'} allows removing entries without any sequence entries; set to \code{'duplicated'} to remove duplicate entries (same sequence and same header)}

\item{tableOut}{(logical) toggle to return named character-vector or matrix with enhaced parsing of fasta-header. The resulting matrix will contain the comumns 'database','uniqueIdentifier','entryName','proteinName','sequence' and further columns depending on argument \code{UniprSep}}

\item{UniprSep}{(character) separators for further separating entry-fields if \code{tableOut=TRUE}, see also \href{https://www.uniprot.org/help/fasta-headers}{UniProt-FASTA-headers}}

\item{cleanCols}{(logical) remove columns with all entries NA, if \code{tableOut=TRUE}}

\item{silent}{(logical) suppress messages}

\item{callFrom}{(character) allows easier tracking of messages produced}

\item{debug}{(logical) supplemental messages for debugging}
}
\value{
This function returns (depending on parameter \code{tableOut}) a) a simple character vector (of sequence) with Uniprot ID as name or b) a matrix with columns: 'database','uniqueIdentifier','entryName','proteinName','sequence' and further columns depending on argument \code{UniprSep}
}
\description{
Read fasta formatted file (from \href{https://www.uniprot.org}{UniProt}) to extract (protein) sequences and name.
If \code{tableOut=TRUE} output may be organized as matrix for separating meta-annotation (eg uniqueIdentifier, entryName, proteinName, GN) in separate columns.
}
\examples{
## Tiny example with common contaminants
path1 <- system.file('extdata',package='wrProteo')
fiNa <-  "conta1.fasta.gz"
fasta1 <- readFasta2(file.path(path1,fiNa))
## now let's read and further separate annotation-fields
fasta2 <- readFasta2(file.path(path1,fiNa),tableOut=TRUE)
str(fasta1)
}
\seealso{
\code{\link{writeFasta2}} for writing as fasta, or for reading \code{\link[base]{scan}} or  \code{read.fasta} from the package \href{https://CRAN.R-project.org/package=seqinr}{seqinr}
}
