Egglib 2.1.11
C++ library reference manual
NucleotideDiversity.hpp
1 /*
2  Copyright 2008-2009 Stéphane De Mita, Mathieu Siol
3 
4  This file is part of the EggLib library.
5 
6  EggLib is free software: you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation, either version 3 of the License, or
9  (at your option) any later version.
10 
11  EggLib is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with EggLib. If not, see <http://www.gnu.org/licenses/>.
18 */
19 
20 
21 #ifndef EGGLIB_NUCLEOTIDEDIVERSITY_HPP
22 #define EGGLIB_NUCLEOTIDEDIVERSITY_HPP
23 
24 
25 #include "BaseDiversity.hpp"
26 #include <string>
27 #include <vector>
28 
29 
30 
31 namespace egglib {
32 
33 
87 
88  public:
89 
94 
95 
99  virtual ~NucleotideDiversity();
100 
101 
153  virtual void load(
154  CharMatrix& data,
155  bool allowMultipleMutations=false,
156  double minimumExploitableData=1.,
157  unsigned int ignoreFrequency=0,
158  std::string characterMapping=dnaMapping,
159  bool useZeroAsAncestral=false
160  );
161 
162 
163  // accessors for the "site analysis" section
164 
166  unsigned int S() const;
167 
169  unsigned int So() const;
170 
172  unsigned int eta() const;
173 
175  double nseff() const;
176 
178  unsigned int lseff() const;
179 
181  double nseffo() const;
182 
184  unsigned int lseffo() const;
185 
187  unsigned int npop() const;
188 
190  unsigned int popLabel(unsigned int popIndex) const; // no check!
191 
192 
193  // accessors for the "diversity" section
194 
196  double Pi();
197 
199  double thetaW();
200 
202  double average_Pi();
203 
205  double pop_Pi(unsigned int popIndex); // no check!
206 
208  double D();
209 
210  // accessors for the "outgroup diversity" section
211 
213  double thetaH();
214 
216  double thetaL();
217 
219  double H();
220 
222  double Z();
223 
225  double E();
226 
227  // accessors for the "differentiation" section
228 
230  unsigned int FixedDifferences();
231 
233  unsigned int CommonAlleles();
234 
236  unsigned int SharedAlleles();
237 
239  unsigned int SpecificAlleles();
240 
242  unsigned int SpecificDerivedAlleles();
243 
245  unsigned int Polymorphisms(unsigned int pop);
246 
248  unsigned int SpecificAlleles(unsigned int pop);
249 
251  unsigned int SpecificDerivedAlleles(unsigned int pop);
252 
254  unsigned int FixedDifferences(unsigned int pop1, unsigned int pop2);
255 
257  unsigned int CommonAlleles(unsigned int pop1, unsigned int pop2);
258 
260  unsigned int SharedAlleles(unsigned int pop1, unsigned int pop2);
261 
262 
263  // accessor for the "triConfigurations" section
264 
287  unsigned int triConfiguration(unsigned int index);
288 
289 
291  std::vector<unsigned int> polymorphic_positions() const;
292 
293 
302  std::vector<unsigned int> singleton_positions() const;
303 
304 
305  protected:
306 
311 
312 
316  NucleotideDiversity& operator=(const NucleotideDiversity& source) { return *this; }
317 
318 
319  void init(); // initializes values
320  void clear(); // free memory but doesn't initializes
321 
322  // diversity (without outgroup)
323  void diversity();
324 
325  // diversity with outgroup
326  void outgroupDiversity();
327 
328  // site patterns
329  void differentiation();
330 
331  // triconfigurations
332  void triConfigurations();
333 
334 
335  // holders for statistics, with booleans flagging groups of stats
336 
337  bool b_analysisSites;
338 
339  bool b_diversity;
340 
341  double v_Pi; // nucleotide diversity
342  double v_thetaW; // theta (Watterson estimator)
343  double v_average_Pi; // average diversity across populations
344  double *v_pop_Pi; // diversity per population
345  double v_D; // Tajima's D
346 
347  bool b_outgroupDiversity;
348 
349  double v_thetaH; // theta (Fay and Wu estimator)
350  double v_thetaL; // theta (Zeng estimator)
351  double v_H; // Fay and Wu's H
352  double v_Z; // normalized Fay and Wu's H
353  double v_E; // Zeng et al.'s E
354 
355  bool b_differentiation;
356 
357  unsigned int *v_pairwiseFixedDifferences;
358  unsigned int *v_pairwiseCommonAlleles;
359  unsigned int *v_pairwiseSharedAlleles;
360  unsigned int *v_popPolymorphic;
361  unsigned int *v_popSpecific;
362  unsigned int *v_popSpecificDerived;
363  unsigned int v_countFixedDifferences;
364  unsigned int v_countCommonAlleles;
365  unsigned int v_countSharedAlleles;
366  unsigned int v_countSpecificAlleles;
367  unsigned int v_countSpecificDerivedAlleles;
368 
369 
370  bool b_triConfigurations;
371 
372  unsigned int *v_triConfigurations;
373 
374  };
375 }
376 
377 #endif
unsigned int Polymorphisms(unsigned int pop)
Number of polymorphisms in a given population (unsecure)
Definition: NucleotideDiversity.cpp:582
unsigned int triConfiguration(unsigned int index)
Number falling into one of the possible site configurations.
Definition: NucleotideDiversity.cpp:648
std::vector< unsigned int > polymorphic_positions() const
Builds and returns the vector of positions of all polymorphic sites.
Definition: NucleotideDiversity.cpp:656
Base class of diversity classes.
Definition: BaseDiversity.hpp:60
double thetaH()
Fay and Wu estimator of theta.
Definition: NucleotideDiversity.cpp:544
unsigned int popLabel(unsigned int popIndex) const
Label of the population with given index (unsecure)
Definition: NucleotideDiversity.cpp:491
std::vector< unsigned int > singleton_positions() const
Builds and returns the vector of positions of all singleton sites.
Definition: NucleotideDiversity.cpp:661
unsigned int lseffo() const
Number of orientable sites.
Definition: NucleotideDiversity.cpp:481
virtual ~NucleotideDiversity()
Destroys an object.
Definition: NucleotideDiversity.cpp:37
unsigned int SpecificAlleles()
Number of sites with at least one allele specific to one population.
Definition: NucleotideDiversity.cpp:572
double Pi()
Nucleotide diversity.
Definition: NucleotideDiversity.cpp:501
double average_Pi()
Average of Pi over populations.
Definition: NucleotideDiversity.cpp:511
unsigned int FixedDifferences()
Number of sites with at least one fixed difference.
Definition: NucleotideDiversity.cpp:557
unsigned int lseff() const
Number of sites effectively used.
Definition: NucleotideDiversity.cpp:471
unsigned int So() const
Number of polymorphic orientable sites.
Definition: NucleotideDiversity.cpp:456
double pop_Pi(unsigned int popIndex)
Pi of a given population (unsecure)
Definition: NucleotideDiversity.cpp:516
double thetaW()
Watterson estimator of theta.
Definition: NucleotideDiversity.cpp:506
double Z()
Standardized H.
Definition: NucleotideDiversity.cpp:534
double D()
Tajima's D.
Definition: NucleotideDiversity.cpp:521
double E()
Zeng et al.'s E.
Definition: NucleotideDiversity.cpp:539
unsigned int npop() const
Number of detected populations.
Definition: NucleotideDiversity.cpp:486
NucleotideDiversity(const NucleotideDiversity &source)
This class cannot be copied.
Definition: NucleotideDiversity.hpp:310
virtual void load(CharMatrix &data, bool allowMultipleMutations=false, double minimumExploitableData=1., unsigned int ignoreFrequency=0, std::string characterMapping=dnaMapping, bool useZeroAsAncestral=false)
Identifies polymorphic sites and computes basis statistics.
Definition: NucleotideDiversity.cpp:114
double nseffo() const
Average of number of sequences effectively used at orientable sites.
Definition: NucleotideDiversity.cpp:476
double H()
Fay and Wu's H.
Definition: NucleotideDiversity.cpp:529
NucleotideDiversity()
Builds an object.
Definition: NucleotideDiversity.cpp:32
unsigned int S() const
Number of polymorphic sites.
Definition: NucleotideDiversity.cpp:451
NucleotideDiversity & operator=(const NucleotideDiversity &source)
This class cannot be copied.
Definition: NucleotideDiversity.hpp:316
Definition: ABC.cpp:37
unsigned int CommonAlleles()
Number of sites with at least one allele shared among at least two populations.
Definition: NucleotideDiversity.cpp:562
unsigned int eta() const
Minimum number of mutations.
Definition: NucleotideDiversity.cpp:461
double thetaL()
Zeng et al. estimator of theta.
Definition: NucleotideDiversity.cpp:549
static const std::string dnaMapping
Predefined mapping string for DNA data.
Definition: BaseDiversity.hpp:98
double nseff() const
Average of per-site number of sequences effectively used.
Definition: NucleotideDiversity.cpp:466
Performs analyzes of population genetics.
Definition: NucleotideDiversity.hpp:86
Interface for classes usable as a square matrix of characters.
Definition: CharMatrix.hpp:31
unsigned int SharedAlleles()
Number of sites with at least one non-fixed allele shared among at least two populations.
Definition: NucleotideDiversity.cpp:567
unsigned int SpecificDerivedAlleles()
Number of sites with at least one derived allele specific to one population.
Definition: NucleotideDiversity.cpp:577

Hosted by 
Get EggLib at SourceForge.net. Fast, secure and Free Open Source software downloads