-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsearch.h
101 lines (87 loc) · 3.45 KB
/
search.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
// -*- c++ -*-
/**
*
* LightStringGraph
*
* Lightweight String Graph Construction.
*
* Copyright (C) 2013, 2014 Stefano Beretta, Yuri Pirola, Marco Previtali
*
* Distributed under the terms of the GNU General Public License (or the Lesser
* GPL).
*
* This file is part of LightStringGraph.
*
* LighStringGraph is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* LightStringGraph is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with LightStringGraph. If not, see <http://www.gnu.org/licenses/>.
*
**/
#ifndef SEARCH_H
#define SEARCH_H
#include <algorithm>
#include <vector>
#include <deque>
#include <stack>
#include "types.h"
#include "util.h"
#include "BWTReader.h"
#include "BWTIterator.h"
#include "GSAIterator.h"
#include "LCPIterator.h"
#include "EndPosManager.h"
#include "interval_manager.h"
#include "arcInterval.h"
#include "extend_symbol_pile.h"
#include "edgeLabelIntervalManager.h"
#include "MultiIntervalManager.h"
#include "MultiFileManager.h"
using std::vector;
using std::deque;
// Count the occurrences of characters lexicographically smaller than base.
BWTPosition OccLT( vector< NucleoCounter >& occ, Nucleotide base );
// Build all the basic arc intervals of length t > \tau with a single pass
// over the LCP file.
// Input: BWT, LCP, GSA, \tau
// Output: basic arc intervals grouped by starting symbols $\sigma$ and
// seed length $l$ (via $max{l}$ "QIntervalManager"s)
SequenceLength build_basic_arc_intervals( BWTIterator& bwt,
LCPIterator& lcp,
GSAIterator& gsa,
const SequenceLength& read_length,
const SequenceLength& tau,
const vector< NucleoCounter >& C,
BasicArcIntervalManager& baimgr);
// Extend arc intervals and save extension symbols to an ExtensionSymbolPile
void extend_arc_intervals( const int length,
const vector< NucleoCounter >& C,
BWTReader& br,
SameLengthArcIntervalManager& qmgr,
SameLengthArcIntervalManager& newqmgr,
ExtendSymbolPile& extsym_p,
EdgeLabelIntervalManager& arcmgr,
EndPosManager& endpos_mgr,
OutputMultiFileManager& arcsOut);
void extend_arc_labels( EdgeLabelIntervalManager& edgemgr,
ExtendSymbolPile& extsym_p,
const vector< NucleoCounter >& C,
BWTReader& br,
LCPIterator& lcpit,
const SequenceLength max_len,
OutputMultiFileManager& labelOut);
struct EPI_t
{
vector< vector< NucleoCounter > > _occs;
int _next;
EPI_t(int i) : _occs(i, vector< NucleoCounter >(ALPHABET_SIZE, 0)), _next(1) {};
};
#endif