Related
I do not want to change my data files that come with the first column containing the time values. Then I formatted it on gnuplot to show only the hour and minute. But it is a bit ugly to start the time from 8:00. I would like to start it from 0 and keep the values at the same pace of the data file. I was trying to use a constant like this example shows How do I make a plot in gnuplot with the lowest value automatically subtracted from the y data? but it is not working.
Here are my source and the plot.
#!/usr/bin/gnuplot
# set grid
set key outside bottom center horizontal
set key font ",19"
set style line 1 lc rgb '#E02F44' lt 1 lw 1 ps 0.5 pt 7 # input throughput
set style line 2 lc rgb '#FF780A' lt 1 lw 1 ps 0.5 pt 1 # output throughput
set style line 3 lc rgb '#56A64B' lt 1 lw 1 ps 0.5 pt 2 # average processing latency
set style line 4 lc rgb '#000000' lt 1 lw 1 ps 0.5 pt 3 # 99th percentile processing latency
set style arrow 1 heads ls 4
set style arrow 2 head ls 4
set terminal pdf
set pointintervalbox 0
set datafile separator ','
set output "Cost-20K-ThroughputVsLatency.pdf"
#set title ""
set xlabel "time (minutes)" font ",17" offset 0,1,0
set xtics font ",8" offset 0,0.5,0
set xdata time # tells gnuplot the x axis is time data
set timefmt "%Y-%m-%d %H:%M:%S" # specify our time string format
set format x "%H:%M" # otherwise it will show only MM:SS
set xrange ["2020-05-07 08:05:00":"2020-05-07 09:50:00"]
set ylabel "Throughput (K rec/sec)" font ",18" offset 0,0,0
set yrange [0:7]
set ytics font ",20"
#set y2label "processing latency (seconds)" font ",18" offset -1.5,0,0
set y2range [0:25]
set ytics nomirror
set y2tics 0, 5 font ",17"
plot "throughput-vs-latency-20K.csv" using 1:(column(2)/1000) title "IN throughput" with linespoints ls 1 axis x1y1 \
, "throughput-vs-latency-20K.csv" using 1:(column(10)/1000) title "OUT throughput" with linespoints ls 2 axis x1y1 \
, "throughput-vs-latency-20K.csv" using 1:(column(18)/1000) title "avg. latency" with linespoints ls 3 axis x1y2 \
, "throughput-vs-latency-20K.csv" using 1:(column(26)/1000) title "99th latency" with linespoints ls 4 axis x1y2
UPDATE
I changed my script like you said #theozh but I am still not getting the x axis starting from 0.
set key bottom right
set key font ",11"
set style line 1 lc rgb '#E02F44' lt 1 lw 1 ps 0.5 pt 7 # input throughput
set style line 2 lc rgb '#FF780A' lt 1 lw 1 ps 0.5 pt 1 # output throughput
set style line 3 lc rgb '#56A64B' lt 1 lw 1 ps 0.5 pt 2 # average processing latency
set style line 4 lc rgb '#000000' lt 1 lw 1 ps 0.5 pt 3 # 99th percentile processing latency
set style arrow 1 heads ls 4
set term pdfcairo size 5.0in,2.5in
set pointintervalbox 0
set datafile separator ','
set tmargin 1.5
set border 1+2+8
set xtics nomirror
set output "throughput-latency-increasingK-TaxiRideNYC-50Kpersec.pdf"
myTimeFmt = "%Y-%m-%d %H:%M:%S"
set xlabel "time (minutes)" font ",9" offset 0,1.5,0
set xtics font ",8" #rotate by 45 right
set ylabel "Throughput (K rec/sec)" font ",10" offset 2,0,0
set yrange [0:3.5]
set y2label "processing latency (seconds)" font ",10" offset -2,0,0
set y2range [0:14]
set ytics nomirror
set y2tics 0, 2
set xdata time # tells gnuplot the x axis is time data
set format x "%M" time
plot t=0 "throughput-latency-increasing.csv" u (t==0?(t0=timecolumn(1,myTimeFmt),t=1):NaN, timecolumn(1,myTimeFmt)-t0):(column(2)/1000) title "IN throughput" with linespoints ls 1 axis x1y1 \
, t=0 "throughput-latency-increasing.csv" u (t==0?(t0=timecolumn(1,myTimeFmt),t=1):NaN, timecolumn(1,myTimeFmt)-t0):(column(18)/1000) title "avg. latency" with linespoints ls 3 axis x1y2 \
, 4/0 t "# of tuples pre-aggregating" with vectors arrowstyle 1
values are here:
"Time","pre_aggregate-outPool[0]-avg","pre_aggregate-outPool[1]-avg","pre_aggregate-outPool[2]-avg","pre_aggregate-outPool[3]-avg","pre_aggregate-outPool[4]-avg","pre_aggregate-outPool[5]-avg","pre_aggregate-outPool[6]-avg","pre_aggregate-outPool[7]-avg","pre_aggregate-outPool[0]-99","pre_aggregate-outPool[1]-99","pre_aggregate-outPool[2]-99","pre_aggregate-outPool[3]-99","pre_aggregate-outPool[4]-99","pre_aggregate-outPool[5]-99","pre_aggregate-outPool[6]-99","pre_aggregate-outPool[7]-99","pre_aggregate[0]-param","pre_aggregate[1]-param","pre_aggregate[2]-param","pre_aggregate[3]-param","pre_aggregate[4]-param","pre_aggregate[5]-param","pre_aggregate[6]-param","pre_aggregate[7]-param"
"2020-04-27 10:22:45",33.33333432674408,33.33333432674408,33.33333432674408,33.33333432674408,33.33333432674408,33.33333432674408,33.33333432674408,33.33333432674408,33,70,75,79,33,41,62,75,50000,50000,50000,50000,50000,50000,50000,50000
"2020-04-27 10:23:00",33.33333432674408,33.33333432674408,33.33333432674408,33.33333432674408,33.33333432674408,33.33333432674408,33.33333432674408,33.33333432674408,33,33,75,79,33,33,33,37,50000,50000,50000,50000,50000,50000,50000,50000
"2020-04-27 10:23:15",33.33333432674408,33.33333432674408,33.33333432674408,33.33333432674408,33.33333432674408,33.33333432674408,33.33333432674408,33.33333432674408,33,33,33,33,33,33,33,33,50000,50000,50000,50000,50000,50000,50000,50000
"2020-04-27 10:23:30",33.33333432674408,33.33333432674408,33.33333432674408,33.33333432674408,33.33333432674408,33.33333432674408,33.33333432674408,33.33333432674408,62,66,50,62,66,45,50,66,50000,50000,50000,50000,50000,50000,50000,50000
"2020-04-27 10:23:45",33.33333432674408,33.33333432674408,33.33333432674408,33.33333432674408,33.33333432674408,33.33333432674408,33.33333432674408,33.33333432674408,62,66,50,62,66,45,50,66,50000,50000,50000,50000,50000,50000,50000,50000
"2020-04-27 10:24:00",33.33333432674408,33.33333432674408,33.33333432674408,33.33333432674408,33.33333432674408,33.33333432674408,33.33333432674408,33.33333432674408,33,33,33,33,33,33,33,33,50000,50000,50000,50000,50000,50000,50000,50000
The following example uses the newer gnuplot date time syntax (see help timecolumn), e.g. timecolumn(1,myTimeFmt) and set format x "%H:%M" time.
In order to normalize your time series to the first data point you have to store this time into a variable, e.g. t0 which you can "re-use" in successive plot commands from the same datafile.
Note the different time format for the x axis: "%H:%M" for day time and "%tH:%tM" for hours exceeding 24 hours or minutes exceeding 60 minutes, see help time_specifiers.
Edit:
for better readability of the plot command, I "outsourced" the normalization into a function Normalize(). But note that t=0 is still required at the beginning of the plot command.
in case you have some (uncommented) header lines, you need to skip them via skip <number of header lines>.
Code:
### normalize time data relative to start time
reset session
myTimeFmt = "%Y-%m-%d %H:%M:%S"
# create some test data
set table $Data
plot '+' u (strftime(myTimeFmt,time(0) + $1*3600*2)):(cos($1)) w table
unset table
# function to normalize time column to first value
Normalize(c) = (t==0?(t0=timecolumn(c,myTimeFmt),t=1):NaN, timecolumn(c,myTimeFmt)-t0)
# in case there are uncommented header lines skip them
SkipHeaderLines = 0
set multiplot layout 2,1
set format x "%Y\n%m-%d\n%H:%M" time
plot $Data u (timecolumn(1,myTimeFmt)):3 skip SkipHeaderLines w l ti "absolute time"
set format x "%tH:%tM" time
plot t=0 $Data u (Normalize(1)):3 skip SkipHeaderLines w l ti "relative time"
unset multiplot
### end of code
Result:
I have this script to plot data from a CSV file using gnuplot. I want to add 3 vertical lines at different times on the plot to show where I changed the workload of my experiment. I was trying to do it with vector but it was messing the data already plotted. I attached my chart and added manually the vertical blue line as an example of what I want.
#!/usr/bin/gnuplot
# set grid
set key under left maxrows 1
set style line 1 lc rgb '#E02F44' lt 1 lw 1 ps 0.5 pt 7 # input throughput
set style line 2 lc rgb '#FF780A' lt 1 lw 1 ps 0.5 pt 1 # output throughput
set style line 3 lc rgb '#56A64B' lt 1 lw 1 ps 0.5 pt 2 # average processing latency
set style line 4 lc rgb '#000000' lt 1 lw 1 ps 0.5 pt 3 # 99th percentile processing latency
set terminal pdf
set pointintervalbox 0
set datafile separator ','
set output "efficiency-throughput-networkbuffer-baseline-TaxiRideNYC-100Kpersec.pdf"
set title "Throughput vs. processing latency consuming 50K r/s from the New York City (TLC)"
set xlabel "time (minutes)"
set ylabel "Throughput (K rec/sec)"
set y2label "processing latency (seconds)"
set ytics nomirror
set y2tics 0, 1
set xdata time # tells gnuplot the x axis is time data
set timefmt "%Y-%m-%d %H:%M:%S" # specify our time string format
set format x "%M" # otherwise it will show only MM:SS
plot "throughput-latency-increasing.csv" using 1:(column(2)/1000) title "IN throughput" with linespoints ls 1 axis x1y1 \
, "throughput-latency-increasing.csv" using 1:(column(10)/1000) title "OUT throughput" with linespoints ls 2 axis x1y1 \
, "throughput-latency-increasing.csv" using 1:(column(18)/1000) title "avg. latency" with linespoints ls 3 axis x1y2 \
, "throughput-latency-increasing.csv" using 1:(column(26)/1000) title "99th perc. latency" with linespoints ls 4 axis x1y2 \
#, "" using 1:($1):(3):(0) notitle with vectors nohead
My data file is:
"Time","pre_aggregate[0]-IN","pre_aggregate[1]-IN","pre_aggregate[2]-IN","pre_aggregate[3]-IN","pre_aggregate[4]-IN","pre_aggregate[5]-IN","pre_aggregate[6]-IN","pre_aggregate[7]-IN","pre_aggregate[0]-OUT","pre_aggregate[1]-OUT","pre_aggregate[2]-OUT","pre_aggregate[3]-OUT","pre_aggregate[4]-OUT","pre_aggregate[5]-OUT","pre_aggregate[6]-OUT","pre_aggregate[7]-OUT","pre_aggregate[0]-50","pre_aggregate[1]-50","pre_aggregate[2]-50","pre_aggregate[3]-50","pre_aggregate[4]-50","pre_aggregate[5]-50","pre_aggregate[6]-50","pre_aggregate[7]-50","pre_aggregate[0]-99","pre_aggregate[1]-99","pre_aggregate[2]-99","pre_aggregate[3]-99","pre_aggregate[4]-99","pre_aggregate[5]-99","pre_aggregate[6]-99","pre_aggregate[7]-99"
"2020-04-27 10:31:00",1428.05,1274.4666666666667,1364.6166666666666,1384.4666666666667,1327.3,1376.5,1390.9166666666667,1418.35,1428.05,1274.4666666666667,1364.6333333333334,1384.4666666666667,1327.3,1376.5,1390.9166666666667,1418.35,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1
"2020-04-27 10:31:15",1463.5833333333333,1452.3666666666666,1346.7333333333333,1380.3833333333334,1429.4833333333333,1431.6833333333334,1442.85,1425.15,1463.5833333333333,1452.3666666666666,1346.7333333333333,1380.3833333333334,1429.4833333333333,1431.6833333333334,1442.85,1425.15,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1
"2020-04-27 10:31:30",1393.4666666666667,1396.65,1369.55,1381.3833333333334,1336.8,1434.5166666666667,1440.0833333333333,1399.2833333333333,1393.45,1396.65,1369.55,1381.3833333333334,1336.8,1434.5166666666667,1440.0833333333333,1399.2833333333333,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1
"2020-04-27 10:31:45",1404.8833333333334,1448.5333333333333,1313.9,1308.1,1359.6333333333334,1329.5166666666667,1338.4166666666667,1481.5666666666666,1404.8833333333334,1448.5333333333333,1313.9,1308.1,1359.6333333333334,1329.5166666666667,1338.4166666666667,1481.5833333333333,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1
Of course you can plot your lines and labels. In the example below I'm using the newer syntax compared to set xdata time. Which requires timecolumn(1,myTimeFmt) and e.g. set format x "%M" time.
Your date is in double quotes, so you have to define the timeformat using single quotes including the double quotes.
Furthermore, you are using absolute times, so your lines ideally use the same format. You can put it into a datablock. I hope you can adapt the code to your needs.
Code:
### vertical lines with labels on time axis
reset session
$myLines <<EOD
"2020-04-27 10:34:00"
"2020-04-27 10:39:20"
"2020-04-27 10:43:50"
"2020-04-27 10:48:00"
EOD
myTimeFmt = '"%Y-%m-%d %H:%M:%S"'
StartDate = '"2020-04-27 10:30:00"'
EndDate = '"2020-04-27 10:52:00"'
set format x "%M" time
set xrange [strptime(myTimeFmt,StartDate):strptime(myTimeFmt,EndDate)]
yLow = 1.4
yHigh = 3.5
set tmargin screen 0.90
plot '+' u (strptime(myTimeFmt,StartDate)+$0*60):(rand(0)*3+0.5) w l lc rgb "red" notitle, \
$myLines u (timecolumn(1,myTimeFmt)):(yHigh):("Workload\nchanged") w labels right offset -0.5,1.5 not, \
$myLines u (timecolumn(1,myTimeFmt)):(yLow):(0):(yHigh-yLow) w vec lc rgb "blue" lw 2 nohead not
### end of code
Result:
I have been working with gnuplot for a while now. Recently started using it to create reports to send to customers. I tried to experiment with my plotting script but not much has come of it.
My question simply is, how to make charts from Gnuplot look really awesome? Think of D3/chartjs output for comparison.
I understand 'awesome' is subjective. But general pointers in this regard would help a lot!
Look for color palettes that you like online. One nice source is this.
Experiment with border styles, grid styles et cetera.
Once you are happy with it, put the style items in your gnuplotrc.
This is my gnuplotrc:
set encoding utf8
# See https://github.com/Gnuplotting/gnuplot-palettes
# Line styles (colorbrewer Set1)
set style line 1 lc rgb '#E41A1C' pt 1 ps 1 lt 1 lw 2 # red
set style line 2 lc rgb '#377EB8' pt 6 ps 1 lt 1 lw 2 # blue
set style line 3 lc rgb '#4DAF4A' pt 2 ps 1 lt 1 lw 2 # green
set style line 4 lc rgb '#984EA3' pt 3 ps 1 lt 1 lw 2 # purple
set style line 5 lc rgb '#FF7F00' pt 4 ps 1 lt 1 lw 2 # orange
set style line 6 lc rgb '#FFFF33' pt 5 ps 1 lt 1 lw 2 # yellow
set style line 7 lc rgb '#A65628' pt 7 ps 1 lt 1 lw 2 # brown
set style line 8 lc rgb '#F781BF' pt 8 ps 1 lt 1 lw 2 # pink
# Palette
set palette maxcolors 8
set palette defined ( 0 '#E41A1C', 1 '#377EB8', 2 '#4DAF4A', 3 '#984EA3',\
4 '#FF7F00', 5 '#FFFF33', 6 '#A65628', 7 '#F781BF' )
# Standard border
set style line 11 lc rgb '#808080' lt 1 lw 3
set border 0 back ls 11
set tics out nomirror
# Standard grid
set style line 12 lc rgb '#808080' lt 0 lw 1
set grid back ls 12
unset grid
And this is my template for gnuplot files:
set terminal pdfcairo enhanced color dashed font "Alegreya, 14" \
rounded size 16 cm, 9.6 cm
# Default encoding, line styles, pallette, border and grid are set in
# /usr/local/share/gnuplot/x.y/gnuplotrc.
set xlabel "x"
set ylabel "f(x)"
set grid
set key right top
set xrange[0:6.28]
set yrange[-1:1]
set output 'sinx.pdf'
plot sin(x) w l ls 1, cos(x) w l ls 2
(You should change the font to one that you have available.)
It looks like this:
Is this awesome? Well I like to think so! But in all seriousness, there are reasons for this layout.
I want the data to be paramount. That is why the border is not empasized and there is no background color.
Nevertheless, I want the viewer to see where the graph crosses the values shown on the labels, hence the visible tickmarks and (unobtrusive) grid.
To make the graphs "fit" into the style of the document I tend to do a couple of things:
Use the same font as the body text of my documents.
Use the same size, line style, borders et cetera for all the graphs in the document.
Match the graph size to the width of the column or text block.
I want to set the background of data labels to white! The considered plot is a data plot of the following data (gnuDC.dat):
4 1570.96 1571
8 770.63 771
12 530.33 530
16 385.13 385
24 261.87 262
48 137.71 138
96 81.42 81
The plot command reads:
plot "gnuDC.dat" using 1:2 title "DC: GNU Fortran 4.7.2 + Open MPI 1.6.3" w p ls 1, \
"gnuDC.dat" using 1:2:3 with labels center offset 2.,0.7 font "Helvetica,14" tc ls 4 notitle, \
"gnuDC.dat" using 1:3 notitle smooth csplines ls 14
Which gives me:
It looks ok but think one could read the lables better when the would have an white background. Is there an easy way to add the white background for all labels at once?
Here is the whole print file:
set terminal postscript eps size 14cm,10cm enhanced color \
font 'Helvetica,18' linewidth 2
set output 'test.eps'
# Line style for axes
set style line 80 lt 0
set style line 80 lt rgb "#808080"
# Line style for grid
set style line 81 lt 3 # dashed
set style line 81 lt rgb "#808080" lw 0.5 # grey
set grid back linestyle 81
set border 3 back linestyle 80
set xtics nomirror
set ytics nomirror
set style line 100 lc rgb '#0060ad' lt 1 lw 2 pt 7 ps 1.5
set style line 200 lc rgb '#a2142f' lt 1 lw 2 pt 7 ps 1.5
set pointintervalbox 0
set style line 1 lc rgb '#0072bd' lt 1 lw 1 pt 9 pi -10 ps 2
set style line 2 lc rgb '#77ac30' lt 1 lw 1 pt 7 pi -10 ps 2
set style line 3 lc rgb '#d95319' lt 1 lw 1 pt 1 pi -10 ps 2
set bmargin 4
set lmargin 5
set rmargin 4
unset title
set size 1,1
#set origin 0,0.27
set xlabel "number of cores, -"
set ylabel "Computational time, s"
set key top right
set key spacing 1.5
set key width -12
set yrange [0:1710]
plot "gnuDC.dat" using 1:2 title "DC: GNU Fortran 4.7.2 + Open MPI 1.6.3" w p ls 1, \
"gnuDC.dat" using 1:2:3 with labels center offset 2.,0.7 font "Helvetica,14" tc ls 4 notitle, \
"gnuDC.dat" using 1:3 notitle smooth csplines ls 14
With gnuplot version 5 there is a boxed option which does exactly this: give labels a background and, if you want, also a border. The style is controlled with set style textbox, e.g.
set style textbox opaque noborder
plot ... with labels boxed ...
Applied to your script (with some minor changes due to the changed dash handling since 5.0):
# Line style for axes
set style line 80 lt rgb "#808080"
# Line style for grid
set style line 81 dt 3 # dashed
set style line 81 lt rgb "#808080" lw 0.5 # grey
set grid back linestyle 81
set border 3 back linestyle 80
set tics nomirror
set linetype 1 lc rgb '#0072bd' pt 9 pi -10 ps 2 dt 3
set bmargin 4
set lmargin 5
set rmargin 4
set xlabel "number of cores, -"
set ylabel "Computational time, s"
set key top right
set key spacing 1.5
set key width -12
set yrange [0:1710]
set style textbox opaque noborder
plot "gnuDC.dat" using 1:2 title "DC: GNU Fortran 4.7.2 + Open MPI 1.6.3" w p lt 1, \
"gnuDC.dat" using 1:2:3 with labels boxed center offset 2.,0.7 font "Helvetica,10" tc ls 1 notitle, \
"gnuDC.dat" using 1:3 notitle smooth csplines lt 1
No, for versions 4.6 and earlier there isn't an easy way to achieve this.
I am trying to plot a graph where the x-values fall in between xtics.
For example, I want my xtics to be
C72 C73 C74 C75 C76 C77 C78 C79 C80 C81
and the points fall in between C72 C73 ; C73 C74 ; C74 C75 ; and so on.
My data points are
> 2.5 0.17509 C72
> 3.5 0.220434 C73
> 4.5 0.164918 C74
> 5.5 0.172477 C75
> 6.5 0.156145 C76
> 7.5 0.171699 C77
> 8.5 0.165199 C78
> 9.5 0.191207 C79
> 10.5 0.211656 C80
> 11.5 0.202233 C81
I used xticlabels() in the script definitions as below:
#OUTPUT
set terminal pngcairo size 650,450 enhanced dash
set output "xplot_gauche_malto-thermo.png"
set style line 4 lt 4 lw 10 # Please DISABLE pause -1
#MICRO
set macro
labelFONT="font 'arial,22'"
scaleFONT="font 'arial,18'"
scaleFONT2="font 'arial,18'"
keyFONT="font 'arial,18'"
# AXIS DEFINITIONS
set xrange [0:12]
set yrange [0:0.8]
set xtic (2,3,4,5,6,7,8,9,10,11) #scaleFONT2
set ytic #scaleFONT
set boxwidth 0.8
set size square
#PLOT
plot "all_dihedrals_in_layers_malto.dat" using 1:2:xticlabels(3) with linespoints lw 2 linecolor rgb "black" pointtype 1 pointsize 2 title ""
If I use the code as above, to get a plot using only column 1 and 2 from data file (as given above) I get the points fall in between 2-3, 3-4, 4-5 and so on.
Unfortunately if I use "xticlabels()", I don't get the graph as I wanted where the point supposed to fall in between C72-C73, C73-C74, C74-C75 and so on.
Appreciate in advance for any help.
Thanks
try something like this.. (Untested i dont have gnuplot on this machine..)
plot "all_dihedrals_in_layers_malto.dat" using 1:2 with linespoints \
lw 2 linecolor rgb "black" pointtype 1 pointsize 2 title "" ,\
"all_dihedrals_in_layers_malto.dat" using ($1-.5):0/0:xticlabels(3)
of course you could alternately manually key in the labels on the set xtics line..
Edit..had a chance toi try it, the 0/0 or (0/0) does not work. What you need to do is plot some value out of range.. eg:
set yrange [0:]
plot "all_dihedrals_in_layers_malto.dat" using 1:2 with linespoints \
lw 2 linecolor rgb "black" pointtype 1 pointsize 2 title "" ,\
"all_dihedrals_in_layers_malto.dat" using ($1-.5):-1:xticlabels(3) notitle