本文主要包括:
- 根据yarn的applicationid获取应用实时资源消耗的方法
- 写python脚本调取yarn api获取实时资源消耗情况
根据yarn的applicationid获取应用实时资源消耗的方法
这种方法是根据hadoop的配置文件,使用YarnClient来获取applicationId的资源占用情况
public class YarnListener {
private static Logger logger = LoggerFactory.getLogger(YarnListener.class);
public static void main(String[] args) {
URL resource = HDFSUtil.class.getClassLoader().getResource("config/hadoop-conf");
String resourceDir = resource.getPath().concat(Constants.SEPARATOR);
Configuration conf = new YarnConfiguration();
conf.addResource(new Path(resourceDir.concat("hdfs-site.xml")));
conf.addResource(new Path(resourceDir.concat("core-site.xml")));
conf.addResource(new Path(resourceDir.concat("yarn-site.xml")));
YarnClient yarnClient = YarnClient.createYarnClient();
yarnClient.init(conf);
yarnClient.start();
ApplicationId applicationId = ApplicationId.newInstance(1577686647484L,0001);
try {
ApplicationReport report = yarnClient.getApplicationReport(applicationId);
System.out.println(report.getStartTime());
System.out.println(report.getApplicationResourceUsageReport().getUsedResources().getVirtualCores());
System.out.println(report.getApplicationResourceUsageReport().getUsedResources().getMemorySize());
System.out.println(report.getStartTime());
System.out.println(report.getApplicationResourceUsageReport().toString());
} catch (YarnException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
yarnClient.stop();
}
}
输出结果如下:
num_used_containers: -1
num_reserved_containers: -1
used_resources {
memory: -1
virtual_cores: -1
resource_value_map {
key: "memory-mb"
value: -1
units: "Mi"
type: COUNTABLE
}
resource_value_map {
key: "vcores"
value: -1
units: ""
type: COUNTABLE
}
}
reserved_resources {
memory: -1
virtual_cores: -1
resource_value_map {
key: "memory-mb"
value: -1
units: "Mi"
type: COUNTABLE
}
resource_value_map {
key: "vcores"
value: -1
units: ""
type: COUNTABLE
}
}
needed_resources {
memory: -1
virtual_cores: -1
resource_value_map {
key: "memory-mb"
value: -1
units: "Mi"
type: COUNTABLE
}
resource_value_map {
key: "vcores"
value: -1
units: ""
type: COUNTABLE
}
}
memory_seconds: 5549488
vcore_seconds: 5378
preempted_memory_seconds: 5549488
preempted_vcore_seconds: 5378
application_resource_usage_map {
key: "memory-mb"
value: 5549488
}
application_resource_usage_map {
key: "vcores"
value: 5378
}
application_preempted_resource_usage_map {
key: "memory-mb"
value: 5549488
}
application_preempted_resource_usage_map {
key: "vcores"
value: 5378
}
扩展
这种方法,感觉可以和hive的hook结合起来使用,在hivesql执行过程中,就把资源使用情况保存下来
这里的资源占用都是实时的占用,可以取一个平均资源占用来当作这个任务的资源占用情况
具体可以参考根据yarn的applicationid获取应用实时资源消耗的方法
写python脚本调取yarn api获取实时资源消耗情况
这种方式,就是用爬虫yarn的监控页面,取页面上的内容,其实不是很好,因为namenode在ha之后会切换,但是很方便
import requests
import json
import time
import pymysql
from bs4 import BeautifulSoup
import os,threading,time
url_full="http://ip:port/cluster"
conn = pymysql.connect(host='114.67.103.201',user = "root",passwd = "admin@123456",db = "myuse")
cur=conn.cursor()#获取游标
response = requests.request("GET", url_full,)
print(url_full)
soup = BeautifulSoup(response.text, 'html.parser') #也可用lxml
#已提交的任务
apps_submit = soup.select('#metricsoverview > tbody > tr > td:nth-of-type(1)')[0].get_text().strip()
print(apps_submit)
#等待中的任务
apps_pending = soup.select('#metricsoverview > tbody > tr > td:nth-of-type(2)')[0].get_text().strip()
print(apps_pending)
#运行中的任务
apps_running = soup.select('#metricsoverview > tbody > tr > td:nth-of-type(3)')[0].get_text().strip()
print(apps_running)
#已完成的任务
apps_completed = soup.select('#metricsoverview > tbody > tr > td:nth-of-type(4)')[0].get_text().strip()
print(apps_completed)
#运行的contaniner
container_running = soup.select('#metricsoverview > tbody > tr > td:nth-of-type(5)')[0].get_text().strip()
print(container_running)
#一用的内存
memory_used = soup.select('#metricsoverview > tbody > tr > td:nth-of-type(6)')[0].get_text().strip()
print(memory_used)
#总内存
memory_total = soup.select('#metricsoverview > tbody > tr > td:nth-of-type(7)')[0].get_text().strip()
print(memory_total)
#已用的core
core_used = soup.select('#metricsoverview > tbody > tr > td:nth-of-type(9)')[0].get_text().strip()
print(core_used)
#总的core
core_total = soup.select('#metricsoverview > tbody > tr > td:nth-of-type(10)')[0].get_text().strip()
print(core_total)
#yarn总的节点数
node_total = soup.select('#nodemetricsoverview > tbody > tr > td:nth-of-type(1) > a')[0].get_text().strip()
print(node_total)
#yran的调度类型
yarn_scheduler_type = soup.select('#schedulermetricsoverview > tbody > tr > td:nth-of-type(1)')[0].get_text().strip()
print(yarn_scheduler_type)
sql="insert into yarn_monitor(apps_submit,apps_pending,apps_running,memory_used,memory_total,core_used,node_total,yarn_scheduler_type)" \
" values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
cur.execute(sql,(apps_submit,apps_pending,apps_running,memory_used,memory_total,core_used,node_total,yarn_scheduler_type))
conn.commit()
cur.close()
conn.commit()
conn.close()
根据appid获取yarn的执行日志
当日志量不大的情况下,直接使用如下命令查看:
yarn logs -applicationId application_1641525781336_0011
但是,当日志量很大的情况下,直接执行上述命令,会报如下错误:
Exception in thread "main" java.lang.RuntimeException: The total log size is too large.The log size limit is 10MB. Please specify a proper value --size option or if you really want to fetch all, please specify -1 for --size_limit_mb option.
at org.apache.hadoop.yarn.client.cli.LogsCLI.getMatchedLogFiles(LogsCLI.java:1169)
at org.apache.hadoop.yarn.client.cli.LogsCLI.getMatchedContainerLogFiles(LogsCLI.java:1348)
at org.apache.hadoop.yarn.client.cli.LogsCLI.getMatchedOptionForRunningApp(LogsCLI.java:1517)
at org.apache.hadoop.yarn.client.cli.LogsCLI.getMatchedLogTypesForRunningApp(LogsCLI.java:1537)
at org.apache.hadoop.yarn.client.cli.LogsCLI.fetchApplicationLogs(LogsCLI.java:1090)
at org.apache.hadoop.yarn.client.cli.LogsCLI.runCommand(LogsCLI.java:372)
at org.apache.hadoop.yarn.client.cli.LogsCLI.run(LogsCLI.java:153)
这时候可以只取一部分日志看看里面都是什么,一般情况下,都是因为代码里有println内容了:
# -size就是获取n字节的日志
yarn logs -applicationId application_1641525781336_0011 -size 1000000000 > appid.log