Commit 506e81d5 authored by Mohammad Akhlaghi's avatar Mohammad Akhlaghi

Reorganized files, working on statistics

The source files have been separated: with the Make (`.mk') files now
in `reproduce/make' and the scripts (for the time being only AWK) in
`reproduce/scripts'. Also work has started on deriving the statistics
of each field, but is not yet complete (it isn't a prerequisite of the
final PDF for now in this commit).
parent d5828815
......@@ -64,7 +64,7 @@ all: $(BSYM) description.pdf
# Note that we cannot simply include `reproduce/src/*.mk'. Because the
# order of reading them into Make actually matters in some cases.
include $(foreach m, preparations download degrade-hst input-cutouts \
catalog description, reproduce/src/$(m).mk)
catalog statistics description, reproduce/make/$(m).mk)
......
......@@ -173,48 +173,3 @@ $(fullmosaic): $(ccatdir)/udf-%.txt: $$(foreach i, 1 2 3 4 5 6 7 8 9, \
# Put all the non-commented rows of each subfield.
for file in $^; do awk '!/^#/{print}' $$file >> $@; done
# Prepare 2D histograms
# ---------------------
#
# For time being, we are plotting 2D histograms.
twodxmin=20
twodxmax=30
twodxnbins=40
twodymin=-1.8125
twodymax=1.8125
twodynbins=42
twoddir = $(BDIR)/tex/2D-histograms
two-d-hists = $(foreach field, udf udf10, \
$(foreach filter, $(filters), \
$(twoddir)/$(field)-$(filter).txt) )
$(two-d-hists): $(twoddir)/%.txt: $(ccatdir)/%.txt | $(twoddir)
# Pull out the two columns to generate the histogram
awk '!/^#/{print $$5, $$8}' $(ccatdir)/$*.txt > $(twoddir)/$*_fh.txt
# Generate the 2D histogram.
awk -vxmin=$(twodxmin) -vxmax=$(twodxmax) -vymin=$(twodymin) \
-vymax=$(twodymax) -vxnumbins=$(twodxnbins) \
-vynumbins=$(twodynbins) -vcol=8 -vshowemptylast=1 \
-vextraout=0 -i reproduce/src/checks.awk \
-f reproduce/src/2dhist.awk $(twoddir)/$*_fh.txt > $@
# Clean up
rm $(twoddir)/$*_fh.txt
# TeX macros
# ----------
$(mtexdir)/catalog.tex: $(two-d-hists) | $(mtexdir)
echo "\\newcommand{\\twodxmin}{$(twodxmin)}" > $@
echo "\\newcommand{\\twodxmax}{$(twodxmax)}" >> $@
echo "\\newcommand{\\twodxnbins}{$(twodxnbins)}" >> $@
echo "\\newcommand{\\twodynbins}{$(twodynbins)}" >> $@
......@@ -48,7 +48,7 @@ tikz:; mkdir tikz
# do not necessarily exist before Make actually starts (for example
# when the pipeline is run for the first time). You just have to add
# their root filename to the prerequisites list manually here.
tex/pipeline.tex: $(foreach t, versions catalog, $(mtexdir)/$(t).tex)
tex/pipeline.tex: $(foreach t, versions statistics, $(mtexdir)/$(t).tex)
echo "\\newcommand{\\buildtexdir}{$(BDIR)/tex}" > $@
cat $^ >> $@
......
# Broad-band photometry checks with MUSE generated broad-band images.
#
# The rules in this file are for measuring the statistics over each
# field to compare.
#
# Original author:
# Mohammad Akhlaghi <mohammad.akhlaghi@univ-lyon1.fr>
# Contributing author(s):
# Copyright (C) 2016, Mohammad Akhlaghi.
#
# This script is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This script is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# A copy of the GNU General Public License is available at
# <http://www.gnu.org/licenses/>.
# Prepare 2D histograms
# ---------------------
#
# For time being, we are plotting 2D histograms.
twodxmin=20
twodxmax=30
twodxnbins=40
twodymin=-1.8125
twodymax=1.8125
twodynbins=42
twoddir = $(BDIR)/tex/2D-histograms
two-d-hists = $(foreach field, udf udf10, \
$(foreach filter, $(filters), \
$(twoddir)/$(field)-$(filter).txt) )
$(two-d-hists): $(twoddir)/%.txt: $(ccatdir)/%.txt \
reproduce/scripts/two-dim-hist.awk \
reproduce/scripts/checks.awk | $(twoddir)
# Pull out the two columns to generate the histogram
awk '!/^#/{print $$5, $$8}' $(ccatdir)/$*.txt > $(twoddir)/$*_fh.txt
# Generate the 2D histogram.
awk -vxmin=$(twodxmin) -vxmax=$(twodxmax) -vymin=$(twodymin) \
-vymax=$(twodymax) -vxnumbins=$(twodxnbins) \
-vynumbins=$(twodynbins) -vcol=8 -vshowemptylast=1 \
-vextraout=0 -i reproduce/scripts/checks.awk \
-f reproduce/scripts/two-dim-hist.awk $(twoddir)/$*_fh.txt > $@
# Clean up
rm $(twoddir)/$*_fh.txt
# Average and dispersion in bins
# ------------------------------
#
# To confirm that there is no problem in the individual fields, we
# also need the average and standard deviation in each bin of each
# field to plot as lines over the 2D histograms.
histxnbins = 15
histstdmultip = 3
statdir = $(BDIR)/statistics
mag-ave-std = $(foreach uid, 1 2 3 4 5 6 7 8 9 10, \
$(foreach f, $(filters), $(statdir)/udf$(uid)-$(f).txt) )
$(mag-ave-std): $(statdir)/%.txt: $(ccatdir)/%.txt \
reproduce/scripts/bin-ave-std.awk | $(statdir)
awk -vxmin=$(twodxmin) -vxmax=$(twodxmax) -vxcol=5 -vycol=8 \
-vxnumbins=$(twodxnbins) -vstdmultip=$(histstdmultip) \
-i reproduce/scripts/checks.awk \
-f reproduce/scripts/bin-ave-std.awk $<
exit 1
# TeX macros
# ----------
$(mtexdir)/statistics.tex: $(two-d-hists) | $(mtexdir)
echo "\\newcommand{\\twodxmin}{$(twodxmin)}" > $@
echo "\\newcommand{\\twodxmax}{$(twodxmax)}" >> $@
echo "\\newcommand{\\twodxnbins}{$(twodxnbins)}" >> $@
echo "\\newcommand{\\twodynbins}{$(twodynbins)}" >> $@
echo "\\newcommand{\\histstdmultip}{$(histstdmultip)}" >> $@
# Average and standard deviation of Y values in X axis bins.
#
# Copyright (C) 2016, Mohammad Akhlaghi <mohammad.akhlaghi@univ-lyon1.fr>
#
# This script is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This script is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details. See
# <http://www.gnu.org/licenses/>.
#
#
#
#
# This script goes over the given input column and counts how many of
# the data elements (rows) are within the given range.
#
# Library functions:
# ==================
#
# This program needs two outside functions:
#
# checkifset()
# checkifint()
#
#
# Variables:
# ==========
#
# xmin: Minimum value for the histogram.
# xmax: Maximum value for the histogram.
# xnumbins: Number of bins in the histogram.
# xcol: Column representing X axis.
# ycol: Column representing Y axis.
# showemptylast: If ==1, then show the last bins if
# they empty (with a zero value).
# extraout: Add an extra empty bin on each side.
#
# Example:
# ========
#
# Lets assume you have put all the variables into the shell variable
# AWKVAR. So you can run this script with:
#
# awk $AWKVAR -i /path/to/checks.awk -f /path/to/histogram.awk input.txt
# Do the preparations:
BEGIN {
# Check if all the variables are set:
checkifset(xmax, "xmax")
checkifset(xmin, "xmin")
checkifint(xcol, "xcol")
checkifint(ycol, "ycol")
checkifint(xnumbins, "xnumbins")
checkifset(stdmultip, "stdmultip")
# Set the optional parameters.
if(extraout=="") extraout=0
# Set the required variables (macros)
xwidth=(xmax-xmin)/xnumbins
}
# Go over each line (record) in the input file. If it is commented
# (starts with a #) then ignore it. This histogram finding method uses
# the special feature of AWK arrays: that the indexs have names, they
# are not ordered, they can be added any time and they initiate with
# zero. For each field checked, the appropriate counter is incremented
# by one.
!/^#/ {
# If the value is smaller or larger than the desired region, then
# go to the next record. This is not of interest.
if($xcol < xmin || $xcol > xmax) next
# Find the middle point of the bin point for record.
xmidbin = xmin + int( ($xcol-xmin)/xwidth )*xwidth + xwidth/2
# If $xcol==xmax then the midbins will be larger than the maximum
# value. In that case, we want it to be added to the last bin.
if(xmidbin>xmax) {
if($xcol==xmax) xmidbin-=xwidth
else next
}
# Convert the midbin value to a string, so floating point errors
# don't bother with the counting.
strmidbin=sprintf("%-10.3f", xmidbin)
# Increment the number, add the value to the array.
values[strmidbin][num[strmidbin]] = $ycol
num[strmidbin]++
}
# Print out the results.
END {
# Set the first and last bins.
minix = extraout==0 ? 0 : -1;
maxix = extraout==0 ? xnumbins : xnumbins+1;
for(x=minix; x<maxix; ++x){
# Set the string value of this bin (to read from the array).
strmidbin=sprintf("%-10.3f", xmin+x*xwidth+xwidth/2)
# Only go through the trouble if we have more than two
# elements.
if(num[strmidbin]<=2) continue
# Since there are strong outliers sometimes, we need to
# sigma-clip the data, So first find the initial standard
# deviation.
n=0
M2=0
imean=0
for(i=0;i<num[strmidbin];++i) {
# Standard deviation is found using the Welford algorithm
# given in Wikipedia:
# (https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance).
++n;
print values[strmidbin][i]
delta = values[strmidbin][i] - imean
imean += delta / n
delta2 = values[strmidbin][i] - imean
M2 += delta * delta2
}
istd=M2/(n-1)
printf("%s: %d, %f, %f\n", strmidbin, n, imean, istd);
# Now, find the mean and STD again, but only with values that
# are within a given multiple of the STD.
n=0
M2=0
mean=0
for(i=0;i<num[strmidbin];++i)
if(values[strmidbin][i]>imean-stdmultip*istd \
&& values[strmidbin][i]<imean+stdmultip*istd) {
++n;
delta = values[strmidbin][i] - mean
mean += delta / n
delta2 = values[strmidbin][i] - mean
M2 += delta * delta2
}
printf("%s: %d, %f, %f\n", strmidbin, n, mean, n>2 ? M2/(n-1) : 0);
# Print this value if we have more than two elements:
#if(n>2)
#printf("%-10.3f%-10.3f%-10.3f\n", xmin+x*xwidth+xwidth/2,
# mean, M2/(n-1) )
}
}
......@@ -60,8 +60,8 @@ BEGIN {
checkifset(xmin, "xmin")
checkifset(xmax, "ymax")
checkifset(xmin, "ymin")
checkifset(xnumbins, "xnumbins")
checkifset(xnumbins, "ynumbins")
checkifint(xnumbins, "xnumbins")
checkifint(xnumbins, "ynumbins")
# Set the optional parameters.
if(extraout=="") extraout=0
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment