Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 3ed8499

Browse files
committed
Dirty testing script looping over configurations.
1 parent 3e925b7 commit 3ed8499

12 files changed

+248
-15
lines changed

devops/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
inventory_manual/manual
33
logs/
44
res/
5+
tester_res.csv*
6+
wal_lag.txt
57

68
/postgresql.conf.common
79
/postgresql.conf.lord

devops/__init__.py

Whitespace-only changes.

devops/ec2.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
# good for both launch and termination
2828
- import_tasks: tasks/clean_ec2_cache.yml
2929
tags:
30-
- clean_ec2_cache
30+
- always
3131

3232
- name: terminate all ec2 instances
3333
ec2:
@@ -37,6 +37,6 @@
3737
wait: "{{ wait }}"
3838
with_items: "{{ groups.ec2 }}"
3939
vars:
40-
wait: no
40+
wait: yes
4141
tags:
4242
- terminate

devops/inventory_ec2/__init__.py

Whitespace-only changes.

devops/inventory_ec2/ec2_elect_shardlord.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!/usr/bin/env python
1+
#!/usr/bin/env python3
22

33
'''
44
Run ec2.py stored in .. directory and append ec2_shardlord, ec2_workers and
@@ -13,7 +13,7 @@
1313
def ec2_elect_shardlord():
1414
script_dir = os.path.dirname(os.path.realpath(__file__))
1515
ec2_path = os.path.join(os.path.dirname(script_dir), "ec2.py")
16-
ec2_inv_txt = subprocess.check_output([ec2_path, "--list"])
16+
ec2_inv_txt = subprocess.check_output([ec2_path, "--list"]).decode('ascii')
1717
ec2_inv = json.loads(ec2_inv_txt)
1818
hosts = list(ec2_inv["_meta"]["hostvars"].keys())
1919
# sort to choose the same shardlord each time
@@ -22,8 +22,7 @@ def ec2_elect_shardlord():
2222
ec2_inv["ec2_shardlord"] = shardlord
2323
ec2_inv["ec2_workers"] = workers
2424
ec2_inv["ec2_init_node"] = init_node
25-
print json.dumps(ec2_inv, sort_keys=True, indent=2)
26-
25+
return ec2_inv
2726

2827
if __name__ == '__main__':
29-
ec2_elect_shardlord()
28+
print(json.dumps(ec2_elect_shardlord(), sort_keys=True, indent=2))

devops/monitor_wal_lag.sh

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#!/usr/bin/env bash
2+
set -e
3+
4+
app_name=`psql -qtA -c "select application_name from pg_stat_replication where application_name ~ '.*accounts.*' limit 1;"`
5+
6+
> wal_lag.txt
7+
while true; do
8+
lag=$(psql -qtA -c "select pg_current_wal_lsn() - (select flush_lsn from pg_stat_replication where application_name = '${app_name}');")
9+
echo "${lag}" >> wal_lag.txt
10+
sleep 2s
11+
done

devops/pgbench_prepare.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@
7474
tags:
7575
- create_hash_partitions_accounts
7676

77-
- name: create_hash_partitions for branches & tellers, nparts {{ nparts }}
77+
- name: create_hash_partitions for branches & tellers, nparts {{ nparts }}, repfactor {{ repfactor }}
7878
command: >
7979
psql -p {{ pg_port }} -c "
8080
select shardman.create_hash_partitions({{ my_id.stdout }}, 'pgbench_tellers', 'tid', {{ nparts }}, {{ rebalance }});

devops/pgbench_run.yml

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@
5353

5454
- name: calc sum tps
5555
local_action: >
56-
shell echo "sum: `awk '{ sum += $1 } END { print sum }'
56+
shell echo -e "sum: \n`awk '{ sum += $1 } END { print sum }'
5757
res/{{ test_id }}/tps.txt`" >> res/{{test_id}}/tps.txt
5858
run_once: true
5959

@@ -63,21 +63,28 @@
6363
dest: res/{{ test_id }}/postgresql.conf.worker
6464
flat: yes
6565
run_once: true
66+
tags:
67+
- dump
6668

6769
- name: collect pg_config
6870
shell: pg_config > "pg_config.out"
6971
run_once: true
72+
tags:
73+
- dump
7074

7175
- name: collect pg_config
7276
fetch:
7377
src: "pg_config.out"
7478
dest: res/{{ test_id }}/pg_config.out
7579
flat: yes
7680
run_once: true
81+
tags:
82+
- dump
7783

7884
- import_playbook: logs.yml logs=res/{{ test_id }}/logs
7985
tags:
8086
- logs
87+
- dump
8188

8289
- hosts: shardlord
8390
tasks:
@@ -87,15 +94,19 @@
8794
dest: res/{{ test_id }}/postgresql.conf.lord
8895
flat: yes
8996
run_once: true
97+
tags:
98+
- dump
9099

91100
- hosts: localhost
92101
tasks:
93102
- name: Dump all vars
94103
action: template src=templates/dumpallvars.j2 dest=res/{{ test_id }}/allvars.txt
104+
tags:
105+
- dump
95106

96107
- name: print tps
97108
local_action: shell cat res/{{ test_id }}/tps.txt
98109
register: tps
99110
run_once: true
100111

101-
- debug: msg="{{ tps.stdout_lines }}. test_id {{ test_id }} "
112+
- debug: msg="{{ tps.stdout_lines }}. test_id is {{ test_id }}"

devops/postgresql.conf.common.example

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,11 @@ fsync = on
3030
autovacuum = off
3131
synchronous_commit = on
3232
# synchronous_commit = local
33+
shardman.sync_replicas = on
3334

3435
shardman.shardlord_dbname = ubuntu
3536
shardman.poll_interval = 500 # long operations poll frequency in milliseconds
3637

37-
shardman.sync_replicas = on
38-
3938
max_prepared_transactions = 100000
4039
postgres_fdw.use_2pc = on
4140
postgres_fdw.use_repeatable_read = off

devops/provision.yml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -152,8 +152,9 @@
152152
when: tags_not_specified is not defined
153153
tags: ars
154154

155-
- name: set PATH in .bashrc
156-
lineinfile: dest=~/.bashrc line="export PATH={{ pg_dst }}/bin:$PATH"
155+
- name: set PATH to pg binaries in .bashrc.
156+
# It is important to insert at the beginning, otherwise non-interactive shells won't read this.
157+
lineinfile: dest=~/.bashrc insertbefore=BOF line="export PATH={{ pg_dst }}/bin:$PATH"
157158
tags:
158159
- set_path
159160
- ars
@@ -194,7 +195,8 @@
194195
tags: perf
195196

196197
- name: set PATH to flamegraph in .bashrc
197-
lineinfile: dest=~/.bashrc line="export PATH={{ fg_path }}:$PATH"
198+
# It is important to insert at the beginning, otherwise non-interactive shells won't read this.
199+
lineinfile: dest=~/.bashrc insertbefore=BOF line="export PATH={{ fg_path }}:$PATH"
198200
tags:
199201
- perf
200202

devops/tester.py

Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
#!/usr/bin/env python3
2+
3+
import time
4+
import re
5+
import csv
6+
import os
7+
import shutil
8+
from subprocess import check_call, check_output
9+
10+
from inventory_ec2.ec2_elect_shardlord import ec2_elect_shardlord
11+
12+
scale = 10
13+
duration = 30
14+
15+
workers = [3, 6, 9]
16+
# replications = ['none', 'logical_sync', 'logical_async', 'trigger']
17+
replications = ['logical_async']
18+
async_config = '''
19+
synchronous_commit = local
20+
shardman.sync_replicas = off
21+
'''
22+
nparts_per_node = [3, 10]
23+
clients = [1, 16, 32, 64, 128]
24+
25+
# debug
26+
# workers = [3]
27+
# replications = ['logical_async']
28+
# nparts_per_node = [3]
29+
# clients = [8]
30+
31+
create_instances = True
32+
destroy_instances = True
33+
provision = True
34+
rebuild_shardman = True
35+
prepare = True
36+
37+
resfile_path = "tester_res.csv"
38+
resfile_writer = None
39+
40+
class TestRow:
41+
def __init__(self):
42+
self.test_id = None
43+
self.workers = None
44+
self.replication = None
45+
self.nparts_per_node = None
46+
self.clients = None
47+
self.tps_sum = None
48+
self.wal_lag = ''
49+
50+
def res_header():
51+
return ["test_id", "instance_type", "num of workers", "nparts", "replicas",
52+
"repmode", "sync_replicas", "sync_commit", "CFLAGS", "scale",
53+
"seconds", "test", "fdw 2pc", "active_workers", "clients", "tps sum",
54+
"avg latency", "end latency", "wal lag"]
55+
56+
def tester():
57+
if os.path.isfile(resfile_path):
58+
shutil.copy(resfile_path, "{}.old".format(resfile_path))
59+
resfile = open(resfile_path, 'w', newline='', buffering=1)
60+
global resfile_writer
61+
resfile_writer = csv.writer(resfile)
62+
resfile_writer.writerow(res_header())
63+
if create_instances:
64+
check_call('ansible-playbook -i inventory_ec2/ ec2.yml --tags "terminate"',
65+
shell=True)
66+
for n in workers:
67+
try:
68+
if create_instances:
69+
check_call('ansible-playbook -i inventory_ec2/ ec2.yml --tags "c3.2xlarge" -e "count={}"'.format(n + 1),
70+
shell=True)
71+
time.sleep(20) ## wait for system to start up
72+
test_row = TestRow()
73+
test_row.workers = n
74+
test_nodes(test_row)
75+
finally:
76+
if destroy_instances:
77+
check_call('ansible-playbook -i inventory_ec2/ ec2.yml --tags "terminate"',
78+
shell=True)
79+
80+
def test_nodes(test_row):
81+
if provision:
82+
check_call(
83+
'''
84+
ansible-playbook -i inventory_ec2/ provision.yml --tags "ars" && \
85+
ansible-playbook -i inventory_ec2/ provision.yml
86+
''', shell=True)
87+
for rep in replications:
88+
test_row.replication = rep
89+
test_repl(test_row)
90+
91+
def test_repl(test_row):
92+
if rebuild_shardman:
93+
if (test_row.replication == 'trigger'):
94+
print("Building tbr shardman")
95+
check_call(
96+
'''
97+
ansible-playbook -i inventory_ec2/ provision.yml -e "shardman_version_tag=tbr-2pc" --tags "build_shardman"
98+
''', shell=True)
99+
else:
100+
print("Bulding master shardman")
101+
check_call(
102+
'''
103+
ansible-playbook -i inventory_ec2/ provision.yml --tags "build_shardman"
104+
''', shell=True)
105+
106+
check_call('cp postgresql.conf.common.example postgresql.conf.common', shell=True)
107+
if test_row.replication == 'logical_async':
108+
with open("postgresql.conf.common", "a") as pgconf:
109+
pgconf.write(async_config)
110+
111+
for nparts in nparts_per_node:
112+
test_row.nparts_per_node = nparts
113+
test_nparts(test_row)
114+
115+
def test_nparts(test_row):
116+
if prepare:
117+
print("Preparing data for {}".format(test_row.__dict__))
118+
repfactor = 0 if test_row.replication == 'none' else 1
119+
check_call(
120+
'''
121+
ansible-playbook -i inventory_ec2/ pgbench_prepare.yml \
122+
-e "scale={} nparts={} repfactor={} rebalance=true tellers_branches=false"
123+
'''.format(scale, test_row.workers * test_row.nparts_per_node, repfactor), shell=True)
124+
for c in clients:
125+
test_row.clients = c
126+
run(test_row)
127+
128+
def run(test_row):
129+
print("Running {}".format(test_row.__dict__))
130+
131+
# monitor wal lag if we test async logical replication
132+
if test_row.replication == 'logical_async':
133+
inventory = ec2_elect_shardlord()
134+
mon_node = "ubuntu@{}".format(inventory['ec2_workers'][0])
135+
print("mon_node is {}".format(mon_node))
136+
check_call('scp monitor_wal_lag.sh {}:'.format(mon_node), shell=True)
137+
check_call(
138+
'''
139+
ssh {} 'nohup /home/ubuntu/monitor_wal_lag.sh > monitor_wal_lag.out 2>&1 &'
140+
'''.format(mon_node), shell=True)
141+
142+
run_output = check_output(
143+
'''
144+
ansible-playbook -i inventory_ec2/ pgbench_run.yml -e \
145+
'tmstmp=true tname=t pgbench_opts="-N -c {} -T {}"'
146+
'''.format(test_row.clients, duration), shell=True).decode("ascii")
147+
148+
# stop wal lag monitoring and pull the results
149+
if test_row.replication == 'logical_async':
150+
check_call('ssh {} pkill -f -SIGTERM monitor_wal_lag.sh'.format(mon_node), shell=True)
151+
check_call('scp {}:wal_lag.txt .'.format(mon_node), shell=True)
152+
max_lag_bytes = int(check_output(
153+
"awk -v max=0 '{if ($1 > max) {max=$1} }END {print max}' wal_lag.txt",
154+
shell=True))
155+
test_row.wal_lag = size_pretty(max_lag_bytes)
156+
157+
158+
print('Here is run output:')
159+
print(run_output)
160+
test_id_re = re.compile('test_id is ([\w-]+)')
161+
test_id = test_id_re.search(run_output).group(1)
162+
print('test id is {}'.format(test_id))
163+
test_row.test_id = test_id
164+
tps_sum = int(float(check_output('tail -1 res/{}/tps.txt | xargs echo -n'.format(test_id),
165+
shell=True).decode("ascii")))
166+
print('tps sum is {}'.format(tps_sum))
167+
test_row.tps_sum = tps_sum
168+
resfile_writer.writerow(form_csv_row(test_row))
169+
170+
171+
def form_csv_row(test_row):
172+
if test_row.replication == 'none':
173+
replicas = 0
174+
else:
175+
replicas = 1
176+
repmode = ''
177+
if test_row.replication.startswith('logical'):
178+
repmode = 'logical'
179+
if test_row.replication == 'trigger':
180+
repmode = 'trigger'
181+
182+
sync_replicas = ''
183+
if test_row.replication == 'logical_sync':
184+
sync_replicas = 'on'
185+
if test_row.replication == 'logical_async':
186+
sync_replicas = 'off'
187+
188+
sync_commit = 'on'
189+
if test_row.replication == 'logical_async':
190+
sync_commit = 'local'
191+
192+
return [test_row.test_id, 'c3.2xlarge', test_row.workers, test_row.workers * test_row.nparts_per_node,
193+
replicas, repmode, sync_replicas, sync_commit, "-O2", scale, duration,
194+
"pgbench -N", "on", test_row.workers, test_row.clients, test_row.tps_sum,
195+
'', '', test_row.wal_lag]
196+
197+
def size_pretty(size, precision=2):
198+
suffixes = ['B','KB','MB','GB','TB']
199+
suffixIndex = 0
200+
while size > 1024 and suffixIndex < 4:
201+
suffixIndex += 1
202+
size = size / 1024.0
203+
return "%.*f%s" % (precision, size, suffixes[suffixIndex])
204+
205+
if __name__ == '__main__':
206+
tester()

postgresql.conf.common

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,3 +52,6 @@ effective_cache_size = 512MB
5252
work_mem = 4MB
5353
max_connections = 1000
5454
max_wal_size = 5GB
55+
56+
postgres_fdw.use_2pc = on
57+
postgres_fdw.use_repeatable_read = off

0 commit comments

Comments
 (0)