...
基本思路,通过newlisp定时下载jobtracker页面,用正则表达式解析html中的table元素,然后获得最新的mapreduce的状态。 每次获得状态数据后,存入mysql数据库,然后用tableau将mapreduce集群状态用报表呈现。 这是jobtracker站点的数据 这是Tableau绘制的报
基本思路,通过newlisp定时下载jobtracker页面,用正则表达式解析html中的table元素,然后获得最新的mapreduce的状态。
每次获得状态数据后,存入mysql数据库,然后用tableau将mapreduce集群状态用报表呈现。
这是jobtracker站点的数据
这是Tableau绘制的报表
这样就可以用数据可视化的方式展示Hadoop集群计算的压力状态。
下面是newlisp代码,主要就是用正则表达式解析html,用mysql模块写入数据库。
#!/usr/bin/newlisp
(load "mysql.lsp")
(define (check-args)
(print "args: ")
(println (main-args))
(set 'args-length (length (main-args)))
(if ((.*)" td) 3))
(if (find "(.*)" r) 3)
r))
(define (remove-td tds)
(set 'result '())
(dolist (td tds)
(push (get-number td) result -1)
)
result
)
(define (parse-summary-table table)
(set 'all-tds (find-all "[\\s\\S]*? | " table))
(set 'all-summary-values (remove-td all-tds))
)
(define (extract-tables html-content)
(set 'all-tables (find-all "
" html-content))
(dolist (table all-tables)
(if (extract-summary-table table)
(set 'summary-table table))
)
(parse-summary-table summary-table)
)
(define (write-summary-to-mysql all-summary-values)
(println all-summary-values)
(set 'mysql-instance (Mysql))
(println "mysql-instance: " mysql-instance)
(set 'mysql-host "10.100.10.10")
(set 'mysql-port 3306)
(set 'mysql-user "user")
(set 'mysql-pwd "123456")
(set 'mysql-db "bigdata_data_market")
(set 'job-tracker-summary-table "hadoop_job_tracker_summary")
(:connect mysql-instance mysql-host mysql-user mysql-pwd mysql-db mysql-port)
(:query mysql-instance "SET character_set_client = utf8;")
(set 'insert-summary-sql (format "insert into %s (collect_time,running_map_tasks,running_reduce_tasks,total_submissions,nodes,occupied_map_slots,occupied_reduce_slots,reserved_map_slots,reserved_reduce_slots,map_task_capacity,reduce_task_capacity,average_tasks_per_node,blacked_listed_nodes,gray_listed_nodes,exclueded_nodes) values (now(),%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
job-tracker-summary-table
(all-summary-values 0)
(all-summary-values 1)
(all-summary-values 2)
(all-summary-values 3)
(all-summary-values 4)
(all-summary-values 5)
(all-summary-values 6)
(all-summary-values 7)
(all-summary-values 8)
(all-summary-values 9)
(all-summary-values 10)
(all-summary-values 11)
(all-summary-values 12)
(all-summary-values 13)))
(println insert-summary-sql)
(:query mysql-instance insert-summary-sql)
)
;; main logic starts now
(check-args)
(parse-args)
(access-job-tracker-site)
(write-summary-to-mysql all-summary-values)
(exit)