pktools 2.6.7
Processing Kernel for geospatial data
pkstatogr.cc
1/**********************************************************************
2pkstatogr.cc: program to calculate basic statistics from vector file
3Copyright (C) 2008-2012 Pieter Kempeneers
4
5This file is part of pktools
6
7pktools is free software: you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation, either version 3 of the License, or
10(at your option) any later version.
11
12pktools is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with pktools. If not, see <http://www.gnu.org/licenses/>.
19***********************************************************************/
20#include <math.h>
21#include <string>
22#include <fstream>
23#include <assert.h>
24#include "base/Optionpk.h"
25#include "imageclasses/ImgReaderOgr.h"
26#include "algorithms/StatFactory.h"
27/******************************************************************************/
75using namespace std;
76
77int main(int argc, char *argv[])
78{
79 Optionpk<string> input_opt("i", "input", "Input OGR vector file", "");
80 Optionpk<string> layer_opt("ln", "lname", "Layer name(s) in sample (leave empty to select all)");
81 Optionpk<string> fieldname_opt("n", "fname", "Fields on which to calculate statistics", "");
82 Optionpk<double> nodata_opt("nodata","nodata","Set nodata value(s)");
83 Optionpk<double> src_min_opt("src_min","src_min","Set minimum value for histogram");
84 Optionpk<double> src_max_opt("src_max","src_max","Set maximum value for histogram");
85 Optionpk<bool> size_opt("s","size","Sample size (number of points)",false);
86 Optionpk<bool> minmax_opt("mm","minmax","Calculate minimum and maximum value",false);
87 Optionpk<bool> min_opt("min","min","Calculate minimum value",0);
88 Optionpk<bool> max_opt("max","max","Calculate maximum value",0);
89 Optionpk<bool> mean_opt("mean","mean","Calculate mean value",false);
90 Optionpk<bool> median_opt("median","median","Calculate median value",false);
91 Optionpk<bool> stdev_opt("stdev","stdev","Calculate standard deviation",false);
92 Optionpk<bool> histogram_opt("hist","hist","Calculate histogram",false);
93 Optionpk<unsigned int> nbin_opt("nbin", "nbin", "Number of bins");
94 Optionpk<bool> relative_opt("rel","relative","Use percentiles for histogram to calculate histogram",false);
95 Optionpk<bool> kde_opt("kde","kde","Use Kernel density estimation when producing histogram. The standard deviation is estimated based on Silverman's rule of thumb",false);
96 Optionpk<short> verbose_opt("v", "verbose", "Verbose level", 0,2);
97
98 bool doProcess;//stop process when program was invoked with help option (-h --help)
99 try{
100 doProcess=input_opt.retrieveOption(argc,argv);
101 fieldname_opt.retrieveOption(argc,argv);
102 layer_opt.retrieveOption(argc,argv);
103 nodata_opt.retrieveOption(argc,argv);
104 src_min_opt.retrieveOption(argc,argv);
105 src_max_opt.retrieveOption(argc,argv);
106 size_opt.retrieveOption(argc,argv);
107 minmax_opt.retrieveOption(argc,argv);
108 min_opt.retrieveOption(argc,argv);
109 max_opt.retrieveOption(argc,argv);
110 mean_opt.retrieveOption(argc,argv);
111 median_opt.retrieveOption(argc,argv);
112 stdev_opt.retrieveOption(argc,argv);
113 histogram_opt.retrieveOption(argc,argv);
114 nbin_opt.retrieveOption(argc,argv);
115 relative_opt.retrieveOption(argc,argv);
116 kde_opt.retrieveOption(argc,argv);
117 verbose_opt.retrieveOption(argc,argv);
118 }
119 catch(string predefinedString){
120 cout << predefinedString << endl;
121 exit(0);
122 }
123 if(!doProcess){
124 cout << endl;
125 cout << "Usage: pkstatogr -i input [-n attribute]*" << endl;
126 cout << endl;
127 cout << "short option -h shows basic options only, use long option --help to show all options" << endl;
128 exit(0);//help was invoked, stop processing
129 }
130
131 ImgReaderOgr imgReader;
132 try{
133 imgReader.open(input_opt[0]);
134 }
135 catch(string errorstring){
136 cerr << errorstring << endl;
137 }
138
139 ImgReaderOgr inputReader(input_opt[0]);
140 vector<double> theData;
142 //todo: implement ALL
143
144 stat.setNoDataValues(nodata_opt);
145
146 //support multiple layers
147 int nlayerRead=inputReader.getDataSource()->GetLayerCount();
148 if(verbose_opt[0])
149 cout << "number of layers: " << nlayerRead << endl;
150
151 for(int ilayer=0;ilayer<nlayerRead;++ilayer){
152 OGRLayer *readLayer=inputReader.getLayer(ilayer);
153 string currentLayername=readLayer->GetName();
154 if(layer_opt.size())
155 if(find(layer_opt.begin(),layer_opt.end(),currentLayername)==layer_opt.end())
156 continue;
157 if(verbose_opt[0])
158 cout << "processing layer " << currentLayername << endl;
159 if(layer_opt.size())
160 cout << " --lname " << currentLayername;
161
162 for(int ifield=0;ifield<fieldname_opt.size();++ifield){
163 if(verbose_opt[0])
164 cout << "field: " << ifield << endl;
165 theData.clear();
166 inputReader.readData(theData,OFTReal,fieldname_opt[ifield],ilayer,verbose_opt[0]);
167 vector<double> binData;
168 double minValue=0;
169 double maxValue=0;
170 stat.minmax(theData,theData.begin(),theData.end(),minValue,maxValue);
171 if(src_min_opt.size())
172 minValue=src_min_opt[0];
173 if(src_max_opt.size())
174 maxValue=src_max_opt[0];
175 unsigned int nbin=(nbin_opt.size())? nbin_opt[0]:0;
176
177 if(histogram_opt[0]){
178 double sigma=0;
179 if(kde_opt[0]){
180 // if(kde_opt[0]>0)
181 // sigma=kde_opt[0];
182 // else
183 sigma=1.06*sqrt(stat.var(theData))*pow(theData.size(),-0.2);
184 }
185 if(nbin<1)
186 nbin=(maxValue-minValue+1);
187 try{
188 stat.distribution(theData,theData.begin(),theData.end(),binData,nbin,minValue,maxValue,sigma);
189 }
190 catch(string theError){
191 cerr << "Warning: all identical values in data" << endl;
192 exit(1);
193 }
194 }
195 // int nbin=(nbin_opt[0]>1)? nbin_opt[0] : 2;
196 cout << " --fname " << fieldname_opt[ifield];
197 try{
198 double theMean=0;
199 double theVar=0;
200 stat.meanVar(theData,theMean,theVar);
201 if(mean_opt[0])
202 cout << " --mean " << theMean;
203 if(stdev_opt[0])
204 cout << " --stdev " << sqrt(theVar);
205 if(minmax_opt[0]||min_opt[0]||max_opt[0]){
206 if(minmax_opt[0])
207 cout << " --min " << minValue << " --max " << maxValue << " ";
208 else{
209 if(min_opt[0])
210 cout << " --min " << minValue << " ";
211 if(max_opt[0])
212 cout << " --max " << maxValue << " ";
213 }
214 }
215 if(median_opt[0])
216 cout << " -median " << stat.median(theData);
217 if(size_opt[0])
218 cout << " -size " << theData.size();
219 cout << endl;
220 if(histogram_opt[0]){
221 for(int ibin=0;ibin<nbin;++ibin){
222 double binValue=0;
223 if(nbin==maxValue-minValue+1)
224 binValue=minValue+ibin;
225 else
226 binValue=minValue+static_cast<double>(maxValue-minValue)*(ibin+0.5)/nbin;
227 cout << binValue << " ";
228 if(relative_opt[0]||kde_opt[0])
229 cout << 100.0*static_cast<double>(binData[ibin])/theData.size() << endl;
230 else
231 cout << binData[ibin] << endl;
232 }
233 }
234 }
235 catch(string theError){
236 if(mean_opt[0])
237 cout << " --mean " << theData.back();
238 if(stdev_opt[0])
239 cout << " --stdev " << "0";
240 if(min_opt[0])
241 cout << " -min " << theData.back();
242 if(max_opt[0])
243 cout << " -max " << theData.back();
244 if(median_opt[0])
245 cout << " -median " << theData.back();
246 if(size_opt[0])
247 cout << " -size " << theData.size();
248 cout << endl;
249 cerr << "Warning: all identical values in data" << endl;
250 }
251 }
252 }
253 imgReader.close();
254}
255