bundles/zfs: add per-dataset metrics

This commit is contained in:
Franzi 2021-06-25 20:04:30 +02:00
parent 9cc324f84c
commit 1c10be5cdc
Signed by: kunsi
GPG key ID: 12E3D2136B818350
5 changed files with 792 additions and 1 deletions

View file

@ -0,0 +1,724 @@
def dashboard_row_zfs(panel_id, node):
return {
'title': 'zfs',
'collapse': False,
'editable': False,
'height': '250px',
'panels': [
{
'aliasColors': {},
'bars': False,
'dashLength': 10,
'dashes': False,
'datasource': None,
'fieldConfig': {
'defaults': {
'displayName': '${__field.name}'
},
'overrides': []
},
'fill': 1,
'fillGradient': 0,
'hiddenSeries': False,
'id': next(panel_id),
'legend': {
'alignAsTable': False,
'avg': False,
'current': False,
'max': False,
'min': False,
'rightSide': False,
'show': True,
'total': False,
'values': False
},
'lines': True,
'linewidth': 1,
'NonePointMode': 'None',
'options': {
'alertThreshold': True
},
'percentage': False,
'pluginVersion': '7.5.5',
'pointradius': 2,
'points': False,
'renderer': 'flot',
'seriesOverrides': [],
'spaceLength': 10,
'span': 4,
'stack': False,
'steppedLine': False,
'targets': [
{
'groupBy': [
{'type': 'time', 'params': ['$__interval']},
{'type': 'fill', 'params': ['linear']},
],
'orderByTime': "ASC",
'policy': "default",
'query': f"""from(bucket: "telegraf")
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) =>
r["_measurement"] == "zfs" and
r["_field"] == "arcstats_c" and
r["host"] == "{node.name}"
)
|> map(fn: (r) => ({{
r with
_field: "target"
}})
)
|> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)
|> yield(name: "target")""",
'resultFormat': 'time_series',
'select': [[
{'type': 'field', 'params': ['value']},
{'type': 'mean', 'params': []},
]],
"tags": []
},
{
'groupBy': [
{'type': 'time', 'params': ['$__interval']},
{'type': 'fill', 'params': ['linear']},
],
'orderByTime': "ASC",
'policy': "default",
'query': f"""from(bucket: "telegraf")
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) =>
r["_measurement"] == "zfs" and
r["_field"] == "arcstats_size" and
r["host"] == "{node.name}"
)
|> map(fn: (r) => ({{
r with
_field: "used"
}})
)
|> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)
|> yield(name: "used")""",
'resultFormat': 'time_series',
'select': [[
{'type': 'field', 'params': ['value']},
{'type': 'mean', 'params': []},
]],
"tags": []
},
],
'thresholds': [],
'timeRegions': [],
'title': 'zfs arc usage',
'tooltip': {
'shared': True,
'sort': 0,
'value_type': 'individual'
},
'type': 'graph',
'xaxis': {
'buckets': None,
'mode': 'time',
'name': None,
'show': True,
'values': []
},
'yaxes': [
{
'format': 'bits',
'label': None,
'logBase': 1,
'max': None,
'min': 0,
'show': True,
},
{
'format': 'short',
'label': None,
'logBase': 1,
'max': None,
'min': None,
'show': False,
}
],
'yaxis': {
'align': False,
'alignLevel': None
}
},
{
'aliasColors': {},
'bars': False,
'dashLength': 10,
'dashes': False,
'datasource': None,
'fieldConfig': {
'defaults': {
'displayName': '${__field.name}'
},
'overrides': []
},
'fill': 1,
'fillGradient': 0,
'hiddenSeries': False,
'id': next(panel_id),
'legend': {
'alignAsTable': False,
'avg': False,
'current': False,
'max': False,
'min': False,
'rightSide': False,
'show': True,
'total': False,
'values': False
},
'lines': True,
'linewidth': 1,
'NonePointMode': 'None',
'options': {
'alertThreshold': True
},
'percentage': False,
'pluginVersion': '7.5.5',
'pointradius': 2,
'points': False,
'renderer': 'flot',
'seriesOverrides': [],
'spaceLength': 10,
'span': 4,
'stack': False,
'steppedLine': False,
'targets': [
{
'groupBy': [
{'type': 'time', 'params': ['$__interval']},
{'type': 'fill', 'params': ['linear']},
],
'orderByTime': "ASC",
'policy': "default",
'query': f"""from(bucket: "telegraf")
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) =>
r["_measurement"] == "zfs" and
r["_field"] == "arcstats_l2_size" and
r["host"] == "{node.name}"
)
|> map(fn: (r) => ({{
r with
_field: "used"
}})
)
|> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)
|> yield(name: "used")""",
'resultFormat': 'time_series',
'select': [[
{'type': 'field', 'params': ['value']},
{'type': 'mean', 'params': []},
]],
"tags": []
},
],
'thresholds': [],
'timeRegions': [],
'title': 'zfs l2arc usage',
'tooltip': {
'shared': True,
'sort': 0,
'value_type': 'individual'
},
'type': 'graph',
'xaxis': {
'buckets': None,
'mode': 'time',
'name': None,
'show': True,
'values': []
},
'yaxes': [
{
'format': 'bits',
'label': None,
'logBase': 1,
'max': None,
'min': 0,
'show': True,
'decimals': 0,
},
{
'format': 'short',
'label': None,
'logBase': 1,
'max': None,
'min': None,
'show': False,
}
],
'yaxis': {
'align': False,
'alignLevel': None
}
},
{
'aliasColors': {},
'bars': False,
'dashLength': 10,
'dashes': False,
'datasource': None,
'fieldConfig': {
'defaults': {
'displayName': '${__field.name}'
},
'overrides': []
},
'fill': 1,
'fillGradient': 0,
'hiddenSeries': False,
'id': next(panel_id),
'legend': {
'alignAsTable': False,
'avg': False,
'current': False,
'max': False,
'min': False,
'rightSide': False,
'show': True,
'total': False,
'values': False
},
'lines': True,
'linewidth': 1,
'NonePointMode': 'None',
'options': {
'alertThreshold': True
},
'percentage': False,
'pluginVersion': '7.5.5',
'pointradius': 2,
'points': False,
'renderer': 'flot',
'seriesOverrides': [],
'spaceLength': 10,
'span': 4,
'stack': False,
'steppedLine': False,
'targets': [
{
'groupBy': [
{'type': 'time', 'params': ['$__interval']},
{'type': 'fill', 'params': ['linear']},
],
'orderByTime': "ASC",
'policy': "default",
'query': f"""from(bucket: "telegraf")
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) =>
r["_measurement"] == "zfs" and
r["_field"] == "arcstats_hits" and
r["host"] == "{node.name}"
)
|> map(fn: (r) => ({{
r with
_field: "hits"
}})
)
|> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)
|> derivative(unit: 1s, nonNegative: true)
|> yield(name: "misses")""",
'resultFormat': 'time_series',
'select': [[
{'type': 'field', 'params': ['value']},
{'type': 'mean', 'params': []},
]],
"tags": []
},
{
'groupBy': [
{'type': 'time', 'params': ['$__interval']},
{'type': 'fill', 'params': ['linear']},
],
'orderByTime': "ASC",
'policy': "default",
'query': f"""from(bucket: "telegraf")
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) =>
r["_measurement"] == "zfs" and
r["_field"] == "arcstats_misses" and
r["host"] == "{node.name}"
)
|> map(fn: (r) => ({{
r with
_field: "misses"
}})
)
|> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)
|> derivative(unit: 1s, nonNegative: true)
|> yield(name: "misses")""",
'resultFormat': 'time_series',
'select': [[
{'type': 'field', 'params': ['value']},
{'type': 'mean', 'params': []},
]],
"tags": []
},
{
'groupBy': [
{'type': 'time', 'params': ['$__interval']},
{'type': 'fill', 'params': ['linear']},
],
'orderByTime': "ASC",
'policy': "default",
'query': f"""from(bucket: "telegraf")
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) =>
r["_measurement"] == "zfs" and
r["_field"] == "arcstats_l2_hits" and
r["host"] == "{node.name}"
)
|> map(fn: (r) => ({{
r with
_field: "l2hits"
}})
)
|> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)
|> derivative(unit: 1s, nonNegative: true)
|> yield(name: "misses")""",
'resultFormat': 'time_series',
'select': [[
{'type': 'field', 'params': ['value']},
{'type': 'mean', 'params': []},
]],
"tags": []
},
{
'groupBy': [
{'type': 'time', 'params': ['$__interval']},
{'type': 'fill', 'params': ['linear']},
],
'orderByTime': "ASC",
'policy': "default",
'query': f"""from(bucket: "telegraf")
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) =>
r["_measurement"] == "zfs" and
r["_field"] == "arcstats_l2_misses" and
r["host"] == "{node.name}"
)
|> map(fn: (r) => ({{
r with
_field: "l2misses"
}})
)
|> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)
|> derivative(unit: 1s, nonNegative: true)
|> yield(name: "misses")""",
'resultFormat': 'time_series',
'select': [[
{'type': 'field', 'params': ['value']},
{'type': 'mean', 'params': []},
]],
"tags": []
},
],
'thresholds': [],
'timeRegions': [],
'title': 'zfs arc hits/misses',
'tooltip': {
'shared': True,
'sort': 0,
'value_type': 'individual'
},
'type': 'graph',
'xaxis': {
'buckets': None,
'mode': 'time',
'name': None,
'show': True,
'values': []
},
'yaxes': [
{
'format': 'short',
'label': None,
'logBase': 1,
'max': None,
'min': 0,
'show': True,
'decimals': 0,
},
{
'format': 'short',
'label': None,
'logBase': 1,
'max': None,
'min': None,
'show': False,
}
],
'yaxis': {
'align': False,
'alignLevel': None
}
},
{
'aliasColors': {},
'bars': False,
'dashLength': 10,
'dashes': False,
'datasource': None,
'fieldConfig': {
'defaults': {
'displayName': '${__field.labels.dataset} ${__field.name}'
},
'overrides': []
},
'fill': 1,
'fillGradient': 0,
'hiddenSeries': False,
'id': next(panel_id),
'legend': {
'alignAsTable': False,
'avg': False,
'current': False,
'max': False,
'min': False,
'rightSide': False,
'show': True,
'total': False,
'values': False
},
'lines': True,
'linewidth': 1,
'NonePointMode': 'None',
'options': {
'alertThreshold': True
},
'percentage': False,
'pluginVersion': '7.5.5',
'pointradius': 2,
'points': False,
'renderer': 'flot',
'seriesOverrides': [],
'spaceLength': 10,
'span': 6,
'stack': True,
'steppedLine': False,
'targets': [
{
'groupBy': [
{'type': 'time', 'params': ['$__interval']},
{'type': 'fill', 'params': ['linear']},
],
'orderByTime': "ASC",
'policy': "default",
'query': f"""from(bucket: "telegraf")
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) =>
r["_measurement"] == "zfs_dataset" and
r["_field"] == "used" and
r["host"] == "{node.name}"
)
|> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)
|> yield(name: "used")""",
'resultFormat': 'time_series',
'select': [[
{'type': 'field', 'params': ['value']},
{'type': 'mean', 'params': []},
]],
"tags": []
},
{
'groupBy': [
{'type': 'time', 'params': ['$__interval']},
{'type': 'fill', 'params': ['linear']},
],
'orderByTime': "ASC",
'policy': "default",
'query': f"""from(bucket: "telegraf")
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) =>
r["_measurement"] == "zfs_dataset" and
r["_field"] == "usedsnap" and
r["host"] == "{node.name}"
)
|> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)
|> yield(name: "out")""",
'resultFormat': 'time_series',
'select': [[
{'type': 'field', 'params': ['value']},
{'type': 'mean', 'params': []},
]],
"tags": []
},
],
'thresholds': [],
'timeRegions': [],
'title': 'zfs usage per dataset',
'tooltip': {
'shared': True,
'sort': 0,
'value_type': 'individual'
},
'type': 'graph',
'xaxis': {
'buckets': None,
'mode': 'time',
'name': None,
'show': True,
'values': []
},
'yaxes': [
{
'format': 'bits',
'label': None,
'logBase': 1,
'max': None,
'min': 0,
'show': True,
},
{
'format': 'short',
'label': None,
'logBase': 1,
'max': None,
'min': None,
'show': False,
}
],
'yaxis': {
'align': False,
'alignLevel': None
}
},
{
'aliasColors': {},
'bars': False,
'dashLength': 10,
'dashes': False,
'datasource': None,
'fieldConfig': {
'defaults': {
'displayName': '${__field.labels.pool} ${__field.name}'
},
'overrides': []
},
'fill': 1,
'fillGradient': 0,
'hiddenSeries': False,
'id': next(panel_id),
'legend': {
'alignAsTable': False,
'avg': False,
'current': False,
'max': False,
'min': False,
'rightSide': False,
'show': True,
'total': False,
'values': False
},
'lines': True,
'linewidth': 1,
'NonePointMode': 'None',
'options': {
'alertThreshold': True
},
'percentage': False,
'pluginVersion': '7.5.5',
'pointradius': 2,
'points': False,
'renderer': 'flot',
'seriesOverrides': [],
'spaceLength': 10,
'span': 6,
'stack': False,
'steppedLine': False,
'targets': [
{
'groupBy': [
{'type': 'time', 'params': ['$__interval']},
{'type': 'fill', 'params': ['linear']},
],
'orderByTime': "ASC",
'policy': "default",
'query': f"""from(bucket: "telegraf")
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) =>
r["_measurement"] == "zfs_pool" and
r["_field"] == "free" and
r["host"] == "{node.name}"
)
|> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)
|> yield(name: "in")""",
'resultFormat': 'time_series',
'select': [[
{'type': 'field', 'params': ['value']},
{'type': 'mean', 'params': []},
]],
"tags": []
},
{
'groupBy': [
{'type': 'time', 'params': ['$__interval']},
{'type': 'fill', 'params': ['linear']},
],
'orderByTime': "ASC",
'policy': "default",
'query': f"""from(bucket: "telegraf")
|> range(start: v.timeRangeStart, stop: v.timeRangeStop)
|> filter(fn: (r) =>
r["_measurement"] == "zfs_pool" and
r["_field"] == "size" and
r["host"] == "{node.name}"
)
|> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)
|> yield(name: "out")""",
'resultFormat': 'time_series',
'select': [[
{'type': 'field', 'params': ['value']},
{'type': 'mean', 'params': []},
]],
"tags": []
},
],
'thresholds': [],
'timeRegions': [],
'title': 'zfs usage per pool',
'tooltip': {
'shared': True,
'sort': 0,
'value_type': 'individual'
},
'type': 'graph',
'xaxis': {
'buckets': None,
'mode': 'time',
'name': None,
'show': True,
'values': []
},
'yaxes': [
{
'format': 'bits',
'label': None,
'logBase': 1,
'max': None,
'min': 0,
'show': True,
'decimals': 0,
},
{
'format': 'short',
'label': None,
'logBase': 1,
'max': None,
'min': None,
'show': False,
}
],
'yaxis': {
'align': False,
'alignLevel': None
}
},
],
}

View file

@ -124,6 +124,10 @@ for rnode in repo.nodes:
dashboard['rows'].append(dashboard_row_wireguard(panel_id, rnode))
dashboard['tags'].add('wireguard')
if rnode.has_bundle('zfs'):
dashboard['rows'].append(dashboard_row_zfs(panel_id, rnode))
dashboard['tags'].add('zfs')
files[f'/var/lib/grafana/dashboards/{rnode.name}.json'] = {
# use metadata_to_json, because this supports sets
'content': metadata_to_json(dashboard),

View file

@ -0,0 +1,51 @@
#!/usr/bin/env python3
from subprocess import check_output
pools = check_output(
['/usr/sbin/zpool', 'list', '-Hpo', 'name,free,size'],
env={
'LC_ALL': 'C',
},
).decode('UTF-8')
datasets = check_output(
['/usr/sbin/zfs', 'list', '-Hpo', 'name,usedbydataset,usedsnap,compressratio'],
env={
'LC_ALL': 'C',
},
).decode('UTF-8')
zpools = {}
for line in pools.splitlines():
name, free, total = line.split()
zpools[name] = {
'free': free,
'total': total,
}
print('zfs_pool,pool={} size={}i,free={}i'.format(name, total, free))
for line in datasets.splitlines():
name, used, usedsnap, compressratio = line.split()
pool = name.split('/')[0]
if '/' not in name:
# covered by pool metrics above
continue
if pool not in zpools:
raise Exception('BUG: {} in datasets, but {} not in pools'.format(name, pool))
if compressratio[-1] == 'x':
compressratio = compressratio[:-1]
print('zfs_dataset,pool={},dataset={} used={}i,usedsnap={}i,compressratio={}'.format(
pool,
name,
used,
usedsnap,
compressratio,
))

View file

@ -44,6 +44,9 @@ files = {
'svc_systemd:zfs-zed:restart'
},
},
'/usr/local/sbin/telegraf-per-dataset': {
'mode': '0755',
},
'/usr/local/sbin/zfs-auto-snapshot': {
'mode': '0755',
},

View file

@ -84,9 +84,18 @@ if node.has_bundle('telegraf'):
'builtin': {
'zfs': [{
'poolMetrics': True,
'datasetMetrics': True,
}],
},
'exec': {
'zfs-dataset': {
'commands': ['sudo /usr/local/sbin/telegraf-per-dataset'],
'data_format': 'influx',
'timeout': '5s',
},
},
},
'sudo_commands': {
'/usr/local/sbin/telegraf-per-dataset',
},
}