This repository has been archived by the owner on Jun 25, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathget_yarn_logs.sh
executable file
·95 lines (77 loc) · 2.37 KB
/
get_yarn_logs.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/bin/bash
# Copyright (c) 2015 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
SOCKS="" # Example: --socks5-hostname 127.0.0.1:6888
URL=${1}
SPARK_JOB_NAME=${2}
APPS=$(curl --silent $SOCKS $URL/cluster/apps | grep "${SPARK_JOB_NAME}" | grep application_)
#Parsing application log url address
#sth like: cdh-master-...:8088/cluster/app/application_1443531803307_0564/
IDS=$(echo $APPS | python -c "
import sys
def between(body, a, b):
return body.split(a)[1].split(b)[0]
parts = sys.stdin.read().split(',')
ids = []
latest = None
for i in parts:
try:
if '/proxy/' not in i or '>History' not in i:
continue
j = between(i, '/proxy/', '>History').replace('\'', '')
ids.append( '$URL/cluster/app/' + j )
except:
pass
def n_from_id(id):
return id.split('_')[2].replace('/A', '')
for i in ids:
if latest == None:
latest = i
continue
try:
n = int(n_from_id(i), 10)
latest_n = int(n_from_id(latest), 10)
if n > latest_n:
latest = i
except:
pass
print latest
")
echo IDS: $IDS
for i in $IDS; do
LOGLINK=$(curl --silent $SOCKS $i | grep logslink)
echo "----> $LOGLINK"
LOGURL=$(echo $LOGLINK | python -c '
import sys
print sys.stdin.read().split("href=\"//")[1].split("\">logs</a>")[0]
')
echo "LOGURL: $LOGURL"
PAGE=$(curl --silent $SOCKS http://$LOGURL)
DIRECTURL=$(echo $PAGE | grep '<meta' | grep 'refresh')
DIRECTLOGURL=$(echo $DIRECTURL | python -c '
import sys
print sys.stdin.read().split("<meta http-equiv=\"refresh\" content=\"1; url=")[1].split("\">")[0]
')
DIRECTLOGURL=$DIRECTLOGURL/stdout/?start=0
echo "DIRECTLOGURL-----> $DIRECTLOGURL"
curl --silent $SOCKS $DIRECTLOGURL | python -c "
import sys
def between(body, a, b):
return body.split(a)[1].split(b)[0]
body = sys.stdin.read()
print between(body, '<pre>', '</pre>')"
echo "====== LOG OK, ID: $i ========"
exit 0
done