欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页  >  IT编程

ETL项目2:大数据清洗,处理:使用MapReduce进行离线数据分析并报表显示完整项目

程序员文章站 2022-03-24 12:47:29
ETL项目2:大数据清洗,处理:使用MapReduce进行离线数据分析并报表显示完整项目 思路同我之前的博客的思路 https://www.cnblogs.com/symkmk123/p/10197467.html 但是数据是从web访问的数据 avro第一次过滤 观察数据的格式,我们主要分析第四个 ......

etl项目2:大数据清洗,处理:使用mapreduce进行离线数据分析并报表显示完整项目

思路同我之前的博客的思路 

但是数据是从web访问的数据

ETL项目2:大数据清洗,处理:使用MapReduce进行离线数据分析并报表显示完整项目

avro第一次过滤

观察数据的格式,我们主要分析第四个字段的数据.发现有.css , .jpg .png等等等无效的数据.

通过观察数据发现有效数据都不带 . , 所以第一次过滤写入avro总表里的数据一次过滤后的有效数据,不包含 .css , .jpg , .png 这样的数据

同时count持久化到mysql

ETL项目2:大数据清洗,处理:使用MapReduce进行离线数据分析并报表显示完整项目

orc1:海牛的topics 最受欢迎的top10

通过观察发现这个需求的有效url是 /topics/数字的 所以在第一次过滤的数据的基础上的正则就是

ETL项目2:大数据清洗,处理:使用MapReduce进行离线数据分析并报表显示完整项目

这种保留下来的也只是/topics/数字这种格式,方便用 hql统计结果

 上代码

//text2avro
package mrrun.hainiuetl;

import java.io.ioexception;
import java.text.parseexception;
import java.text.simpledateformat;
import java.util.date;
import java.util.locale;

import org.apache.avro.schema;
import org.apache.avro.generic.genericdata;
import org.apache.avro.generic.genericrecord;
import org.apache.avro.mapred.avrokey;
import org.apache.avro.mapreduce.avrojob;
import org.apache.avro.mapreduce.avrokeyoutputformat;
import org.apache.hadoop.conf.configuration;
import org.apache.hadoop.io.longwritable;
import org.apache.hadoop.io.nullwritable;
import org.apache.hadoop.io.text;
import org.apache.hadoop.mapreduce.counter;
import org.apache.hadoop.mapreduce.countergroup;
import org.apache.hadoop.mapreduce.counters;
import org.apache.hadoop.mapreduce.job;
import org.apache.hadoop.mapreduce.mapper;
import org.apache.hadoop.mapreduce.lib.input.fileinputformat;
import org.apache.hadoop.mapreduce.lib.output.fileoutputformat;

import mrrun.base.basemr;

public class text2avro extends basemr
{
    public static schema schema = null;
    
    public static schema.parser parse = new schema.parser();
    
public static class text2avromapper extends mapper<longwritable, text, avrokey<genericrecord>, nullwritable>
{
        
    
    
        @override
        protected void setup(mapper<longwritable, text, avrokey<genericrecord>, nullwritable>.context context)
                throws ioexception, interruptedexception {
            //根据user_install_status.avro文件内的格式,生成指定格式的schema对象
            schema = parse.parse(text2avro.class.getresourceasstream("/hainiu.avro"));
            
        }
        @override
        protected void map(longwritable key, text value,context context)
                throws ioexception, interruptedexception {
            string line = value.tostring();
            
            string[] splits = line.split("\001");
            if(splits == null || splits.length != 10){
                system.out.println("==============");
                system.out.println(value.tostring());
                context.getcounter("etl_err", "bad line num").increment(1l);
                return;
            }
            
//            system.out.println(util.getiparea("202.8.77.12"));
            string uip1 = splits[0];
            string uip =iputil.getip(uip1);
            
            string datetime = splits[2];
            stringbuilder sb=new stringbuilder(datetime);
            
            simpledateformat sdf=new simpledateformat("dd/mmm/yyyy:hh:mm:ss",locale.english);
            string sy=sb.tostring();
            date mydate = null;
            try
            {
                mydate = sdf.parse(sy);
            } catch (parseexception e)
            {
                // todo auto-generated catch block
                e.printstacktrace();
            }
            
            simpledateformat sdf2=new simpledateformat("yyyymmddhhmmss");
            //system.out.println(mydate);
            string format = sdf2.format(mydate);
            //get /categories/8?filter=recent&page=12 http/1.1
            string url1 = splits[3];
            stringbuilder sb2=new stringbuilder(url1);
            
            string url = sb2.tostring();
            string method="";
            string top="";
            string top1="";
            string http="";
            if(url!=null)
            {
                string[] s = url.split(" ");
                if(s.length==3)
                {
                    method=s[0];
                    http=s[2];
                    
                    top1=s[1];
                    if(top1.contains("."))
                    {
                        context.getcounter("etl_err", "no line num").increment(1l);
                        return;
                    }
                    else
                    {
                        top=top1;
                    }
                }
            }
            
            string status1 = splits[4];
            string status2 = splits[5];
            string post = splits[6];
            string from = splits[7];
            string usagent1 = splits[8];
            stringbuilder sb3=new stringbuilder(usagent1);
        
            string usagent = sb3.tostring();
            
            
            //根据创建的schema对象,创建一行的对象
            genericrecord record = new genericdata.record(text2avro.schema);
            record.put("uip", uip);
            record.put("datetime", format);
            record.put("method", method);
            record.put("http", http);
            record.put("top", top);
            record.put("from", from);
            record.put("status1", status1);
            record.put("status2", status2);
            record.put("post", post);
            record.put("usagent", usagent);
            
            context.getcounter("etl_good", "good line num").increment(1l);
            system.out.println(uip+"    "+format+"    "+top+"    "+from+"    "+post+"    "+usagent+"    "+status1+"    "+status2+"    "+http);
            
            
            context.write(new avrokey<genericrecord>(record), nullwritable.get());
            
        
        }
    }
    
    
    
    
    @override
    public job getjob(configuration conf) throws ioexception {
//        // 开启reduce输出压缩
//        conf.set(fileoutputformat.compress, "true");
//        // 设置reduce输出压缩格式
//        conf.set(fileoutputformat.compress_codec, snappycodec.class.getname());
                
        job job = job.getinstance(conf, getjobnamewithtaskid());

        job.setjarbyclass(text2avro.class);
        
        job.setmapperclass(text2avromapper.class);
        
        job.setmapoutputkeyclass(avrokey.class);
        job.setmapoutputvalueclass(nullwritable.class);
        
//        无reduce
        job.setnumreducetasks(0);
        
        //设置输出的format
        job.setoutputformatclass(avrokeyoutputformat.class);
        
        //根据user_install_status.avro文件内的格式,生成指定格式的schema对象
        schema = parse.parse(text2avro.class.getresourceasstream("/hainiu.avro"));
        
        //设置avro文件的输出
        avrojob.setoutputkeyschema(job, schema);
        
        fileinputformat.addinputpath(job, getfirstjobinputpath());
        
        fileoutputformat.setoutputpath(job, getjoboutputpath(getjobnamewithtaskid()));
        
        
         
        return job;

    }

    @override
    public string getjobname() {


        return "etltext2avro";

    }

}
//avro2orc_topic10
package mrrun.hainiuetl;

import java.io.ioexception;
import java.util.regex.matcher;
import java.util.regex.pattern;

import org.apache.avro.schema;
import org.apache.avro.generic.genericrecord;
import org.apache.avro.mapred.avrokey;
import org.apache.avro.mapreduce.avrojob;
import org.apache.avro.mapreduce.avrokeyinputformat;
import org.apache.hadoop.conf.configuration;
import org.apache.hadoop.hive.ql.io.orc.compressionkind;
import org.apache.hadoop.hive.ql.io.orc.orcnewoutputformat;
import org.apache.hadoop.io.nullwritable;
import org.apache.hadoop.io.writable;
import org.apache.hadoop.mapreduce.job;
import org.apache.hadoop.mapreduce.mapper;
import org.apache.hadoop.mapreduce.lib.input.fileinputformat;
import org.apache.hadoop.mapreduce.lib.output.fileoutputformat;

import mrrun.base.basemr;
import mrrun.util.orcformat;
import mrrun.util.orcutil;

public class avro2orc_topic10 extends basemr {
    public static schema schema = null;
    
    public static schema.parser parse = new schema.parser();
    

    public static class avro2orcmapper extends mapper<avrokey<genericrecord>, nullwritable, nullwritable, writable>{
        orcutil orcutil = new orcutil();
        
        @override
        protected void setup(context context)
                throws ioexception, interruptedexception {
            orcutil.setwriteorcinspector(orcformat.etlorcschema_topic10);
            
        }
        
        @override
        protected void map(avrokey<genericrecord> key, nullwritable value,context context)
                throws ioexception, interruptedexception {
            //得到一行的对象
            genericrecord datum = key.datum();
            
            string uip = (string) datum.get("uip");
            string datetime = (string) datum.get("datetime");
            //string method = (string) datum.get("method");
            //string http = (string) datum.get("http");
            string top1 = (string) datum.get("top");
            string top="";
            string regex="/topics/\\d+";
            pattern pattern=pattern.compile(regex);
            matcher matcher=pattern.matcher(top1);
            if(matcher.find())
            {
                 top=matcher.group();
            }
            else
            {
                context.getcounter("etl_err", "notopics line num").increment(1l);
                return;
            }
            
            
            
            //orcutil.addattr(uip,datetime,method,http,uid,country,status1,status2,usagent);
            orcutil.addattr(uip,datetime,top);
            
            writable w = orcutil.serialize();
            context.getcounter("etl_good", "good line num").increment(1l);
            system.out.println(uip+"    "+top);
            
            context.write(nullwritable.get(), w);
            
        }
        
    }
    

    
    @override
    public job getjob(configuration conf) throws ioexception {
        
        //关闭map的推测执行,使得一个map处理 一个region的数据
        conf.set("mapreduce.map.spedulative", "false");
        //设置orc文件snappy压缩
        conf.set("orc.compress", compressionkind.snappy.name());
        //设置orc文件 有索引
        conf.set("orc.create.index", "true");
                
        job job = job.getinstance(conf, getjobnamewithtaskid());

        job.setjarbyclass(avro2orc_topic10.class);
        
        job.setmapperclass(avro2orcmapper.class);
        
        job.setmapoutputkeyclass(nullwritable.class);
        job.setmapoutputvalueclass(writable.class);

        
//        无reduce
        job.setnumreducetasks(0);
        
        job.setinputformatclass(avrokeyinputformat.class);
        
        //根据user_install_status.avro文件内的格式,生成指定格式的schema对象
        schema = parse.parse(avro2orc_topic10.class.getresourceasstream("/hainiu.avro"));
        
        avrojob.setinputkeyschema(job, schema);
        
    
        job.setoutputformatclass(orcnewoutputformat.class);
        
        
        fileinputformat.addinputpath(job, getfirstjobinputpath());
        
        fileoutputformat.setoutputpath(job, getjoboutputpath(getjobnamewithtaskid()));
        return job;

    }

    @override
    public string getjobname() {


        return "etlavro2orc_topic10";

    }

}
//text2avrojob 
package mrrun.hainiuetl;


import java.text.simpledateformat;
import java.util.date;

import org.apache.hadoop.conf.configuration;
import org.apache.hadoop.conf.configured;
import org.apache.hadoop.mapreduce.counter;
import org.apache.hadoop.mapreduce.countergroup;
import org.apache.hadoop.mapreduce.counters;
import org.apache.hadoop.mapreduce.job;
import org.apache.hadoop.mapreduce.lib.jobcontrol.controlledjob;
import org.apache.hadoop.mapreduce.lib.jobcontrol.jobcontrol;
import org.apache.hadoop.util.tool;
import org.apache.hadoop.util.toolrunner;

import mrrun.util.jobrunresult;
import mrrun.util.jobrunutil;

public class text2avrojob extends configured implements tool{
    
    @override
    public int run(string[] args) throws exception {
        //获取configuration对象
        configuration conf = getconf();
        
        
        //创建任务链对象
        jobcontrol jobc = new jobcontrol("etltext2avro");
        
        text2avro avro = new text2avro();
        
        //只需要赋值一次就行
        avro.setconf(conf);
        
        controlledjob orccjob = avro.getcontrolledjob();
        
        job job = orccjob.getjob();
        job.waitforcompletion(true);
        
        jobrunresult result = jobrunutil.run(jobc);
        result.setcounters("etl1", orccjob.getjob().getcounters());
        
        result.print(true);
        
        counters countermap = result.getcountermap("etl1");
         countergroup group1 = countermap.getgroup("etl_good");
         countergroup group2 = countermap.getgroup("etl_err");
        
         counter good = group1.findcounter("good line num");
         counter bad = group2.findcounter("bad line num");
        system.out.println("\t\t"+good.getdisplayname()+"  =  "+good.getvalue());
        system.out.println("\t\t"+bad.getdisplayname()+"  =  "+bad.getvalue());
        system.out.println("=======+++++++++====");
        

        date date=new date();
        simpledateformat sdf3=new simpledateformat("yyyymmdd");
        string format2 = sdf3.format(date);
        results results=new results();
        long bad_num = bad.getvalue();
        long good_num = good.getvalue();
    
        long total_num=bad_num+good_num;
        results.setbad_num(bad_num);
        results.setgood_num(good_num);
        
        results.settotal_num(total_num);
        results.setday(format2);
        double d=bad_num*1.0/total_num*1.0;
        
        results.setbad_rate(d);
        
        
        system.out.println((double)((double)bad_num/(double)total_num));
        
        dao dao=new dao();
        if(dao.getday(format2)!=null)
        {
            results getday = dao.getday(format2);
            long bad_num2 = getday.getbad_num();
            long good_num2 = getday.getgood_num();
            long total_num2 = getday.gettotal_num();
            getday.setday(format2);
            getday.setbad_num(bad_num2+bad_num);
            getday.setgood_num(good_num2+good_num);
            
            getday.settotal_num(total_num2+total_num);
            double badrate=(bad_num2+bad_num)*1.0/(total_num2+total_num)*1.0;
        
            
            getday.setbad_rate(badrate);
        
            dao.update(getday);
        }
        else
        {
            dao.insert(results);
        }
        
        jobc.addjob(orccjob);
        
        return 0;
        
    }

    
    public static void main(string[] args) throws exception {
//        -dtask.id=1226 -dtask.input.dir=/tmp/avro/input_hainiuetl -dtask.base.dir=/tmp/avro
        system.exit(toolrunner.run(new text2avrojob(), args));
    }

}

放一个

自动化脚本思路同第一个etl项目

直接放代码

yitiaolong.sh

#!/bin/bash
source /etc/profile
mmdd=`date -d 1' days ago' +%m%d`
yymm=`date -d 1' days ago' +%y%m`
dd=`date -d 1' days ago' +%d`
/usr/local/hive/bin/hive -e "use suyuan09;alter table etlavrosy add if not exists partition(month='${yymm}',day='${dd}');"
/usr/local/hive/bin/hive -e "use suyuan09;alter table hainiuetltopics10_orc add if not exists partition(month='${yymm}',day='${dd}');"
/usr/local/hive/bin/hive -e "use suyuan09;alter table hainiuetlcategories10_orc add if not exists partition(month='${yymm}',day='${dd}');"
/usr/local/hive/bin/hive -e "use suyuan09;alter table hainiuetlspider_orc add if not exists partition(month='${yymm}',day='${dd}');"
/usr/local/hive/bin/hive -e "use suyuan09;alter table hainiuetlip_orc add if not exists partition(month='${yymm}',day='${dd}');"
/usr/local/hive/bin/hive -e "use suyuan09;alter table hainiuetlindex5_orc add if not exists partition(month='${yymm}',day='${dd}');"


#3-4运行mr
hdfs_path1=/user/hainiu/data/hainiuetl/input/${yymm}/${dd}
avro_path1=/user/suyuan09/hainiuetl/hainiuavro/${yymm}/${dd}
`/usr/local/hadoop/bin/hadoop  jar /home/suyuan09/etl/hainiu/jar/181210_hbase-1.0.0-symkmk123.jar etltext2avro -dtask.id=${mmdd} -dtask.input.dir=${hdfs_path1} -dtask.base.dir=${avro_path1}`

#orctopics10mr.sh

avro_path2=/user/suyuan09/hainiuetl/hainiuavro/${yymm}/${dd}/etltext2avro_${mmdd}/part-*.avro
orc_path2=/user/suyuan09/hainiuetl/orctopics10/${yymm}/${dd}
`/usr/local/hadoop/bin/hadoop  jar /home/suyuan09/etl/hainiu/jar/181210_hbase-1.0.0-symkmk123.jar etlavro2orc_topic10 -dtask.id=${mmdd} -dtask.input.dir=${avro_path2} -dtask.base.dir=${orc_path2}`

#orccategories10mr.sh


avro_path3=/user/suyuan09/hainiuetl/hainiuavro/${yymm}/${dd}/etltext2avro_${mmdd}/part-*.avro
orc_path3=/user/suyuan09/hainiuetl/orccategories10/${yymm}/${dd}
`/usr/local/hadoop/bin/hadoop  jar /home/suyuan09/etl/hainiu/jar/181210_hbase-1.0.0-symkmk123.jar etlavro2orc_categories10 -dtask.id=${mmdd} -dtask.input.dir=${avro_path3} -dtask.base.dir=${orc_path3}`

#orcspidermr.sh

avro_path4=/user/suyuan09/hainiuetl/hainiuavro/${yymm}/${dd}/etltext2avro_${mmdd}/part-*.avro
orc_path4=/user/suyuan09/hainiuetl/orcspider/${yymm}/${dd}
`/usr/local/hadoop/bin/hadoop  jar /home/suyuan09/etl/hainiu/jar/181210_hbase-1.0.0-symkmk123.jar etlavro2orc_spider -dtask.id=${mmdd} -dtask.input.dir=${avro_path4} -dtask.base.dir=${orc_path4}`

#orcipmr.sh

avro_path5=/user/suyuan09/hainiuetl/hainiuavro/${yymm}/${dd}/etltext2avro_${mmdd}/part-*.avro
orc_path5=/user/suyuan09/hainiuetl/orcip/${yymm}/${dd}
`/usr/local/hadoop/bin/hadoop  jar /home/suyuan09/etl/hainiu/jar/181210_hbase-1.0.0-symkmk123.jar etlavro2orc_ip -dtask.id=${mmdd} -dtask.input.dir=${avro_path5} -dtask.base.dir=${orc_path5}`

#orcindex5mr.sh

avro_path6=/user/suyuan09/hainiuetl/hainiuavro/${yymm}/${dd}/etltext2avro_${mmdd}/part-*.avro
orc_path6=/user/suyuan09/hainiuetl/orcindex5/${yymm}/${dd}
`/usr/local/hadoop/bin/hadoop  jar /home/suyuan09/etl/hainiu/jar/181210_hbase-1.0.0-symkmk123.jar etlavro2orc_index5 -dtask.id=${mmdd} -dtask.input.dir=${avro_path6} -dtask.base.dir=${orc_path6}`

#把orc挪到分区目录  
#orc2etl.sh

/usr/local/hadoop/bin/hadoop fs -cp hdfs://ns1/user/suyuan09/hainiuetl/orctopics10/${yymm}/${dd}/etlavro2orc_topic10_${mmdd}/part-*  hdfs://ns1/user/suyuan09/etlorc/hainiuetltopics10_orc/month=${yymm}/day=${dd}
/usr/local/hadoop/bin/hadoop fs -cp hdfs://ns1/user/suyuan09/hainiuetl/orccategories10/${yymm}/${dd}/etlavro2orc_categories10_${mmdd}/part-*  hdfs://ns1/user/suyuan09/etlorc/hainiuetlcategories10_orc/month=${yymm}/day=${dd}
/usr/local/hadoop/bin/hadoop fs -cp hdfs://ns1/user/suyuan09/hainiuetl/orcspider/${yymm}/${dd}/etlavro2orc_spider_${mmdd}/part-*  hdfs://ns1/user/suyuan09/etlorc/hainiuetlspider_orc/month=${yymm}/day=${dd}
/usr/local/hadoop/bin/hadoop fs -cp hdfs://ns1/user/suyuan09/hainiuetl/orcindex5/${yymm}/${dd}/etlavro2orc_index5_${mmdd}/part-*  hdfs://ns1/user/suyuan09/etlorc/hainiuetlindex5_orc/month=${yymm}/day=${dd}
/usr/local/hadoop/bin/hadoop fs -cp hdfs://ns1/user/suyuan09/hainiuetl/orcip/${yymm}/${dd}/etlavro2orc_ip_${mmdd}/part-*  hdfs://ns1/user/suyuan09/etlorc/hainiuetlip_orc/month=${yymm}/day=${dd}




#自动从hive到mysql脚本
#hive2data.sh

/usr/local/hive/bin/hive  -e "use suyuan09;select t.top,t.num from(select top,count(*) num from hainiuetlindex5_orc group by top) t  sort by t.num desc limit 5;" >  /home/suyuan09/etl/hainiu/orc2mysql/myindex5${yymmdd}
/usr/local/hive/bin/hive  -e "use suyuan09;select t.top,t.num from(select top,count(*) num from hainiuetltopics10_orc group by top) t    sort by t.num desc limit 10;" >    /home/suyuan09/etl/hainiu/orc2mysql/mytopics10${yymmdd}
/usr/local/hive/bin/hive  -e "use suyuan09;select t.top,t.num from(select top,count(*) num from hainiuetlcategories10_orc  group by top) t  sort by t.num desc limit 10;" >   /home/suyuan09/etl/hainiu/orc2mysql/mycategories10${yymmdd}
/usr/local/hive/bin/hive  -e "use suyuan09;select t.uip,t.num from(select uip,count(*) num from hainiuetlip_orc  group by uip) t  sort by t.num desc;" >   /home/suyuan09/etl/hainiu/orc2mysql/myip${yymmdd}
/usr/local/hive/bin/hive  -e "use suyuan09;select t.usagent,t.num from(select usagent,count(*) num  from hainiuetlspider_orc  group by usagent) t   sort by t.num desc;" >  /home/suyuan09/etl/hainiu/orc2mysql/myspider${yymmdd}


#data->mysql脚本
#data2mysql.sh

#mysql -h 172.33.101.123 -p 3306 -u tony -pyourpassword -d yourdbname <<eof
/bin/mysql -h192.168.88.195 -p3306 -uhainiu -p12345678 -dhainiutest <<eof

load data local infile "/home/suyuan09/etl/hainiu/orc2mysql/mytopics10${yymmdd}" into table suyuan09_etl_orctopics10mysql fields terminated by '\t';
load data local infile "/home/suyuan09/etl/hainiu/orc2mysql/mycategories10${yymmdd}" into table suyuan09_etl_orccategories10mysql fields terminated by '\t';
load data local infile "/home/suyuan09/etl/hainiu/orc2mysql/myindex5${yymmdd}" into table suyuan09_etl_orcindex5mysql fields terminated by '\t';
load data local infile "/home/suyuan09/etl/hainiu/orc2mysql/myspider${yymmdd}" into table suyuan09_etl_orcspidermysql fields terminated by '\t';
load data local infile "/home/suyuan09/etl/hainiu/orc2mysql/myip${yymmdd}" into table suyuan09_etl_orcipmysql fields terminated by '\t';

eof

报表展示

ETL项目2:大数据清洗,处理:使用MapReduce进行离线数据分析并报表显示完整项目

ETL项目2:大数据清洗,处理:使用MapReduce进行离线数据分析并报表显示完整项目

ETL项目2:大数据清洗,处理:使用MapReduce进行离线数据分析并报表显示完整项目

ETL项目2:大数据清洗,处理:使用MapReduce进行离线数据分析并报表显示完整项目

ETL项目2:大数据清洗,处理:使用MapReduce进行离线数据分析并报表显示完整项目

 

其中 mysql没有自带排序函数,自己写一个

ETL项目2:大数据清洗,处理:使用MapReduce进行离线数据分析并报表显示完整项目

ETL项目2:大数据清洗,处理:使用MapReduce进行离线数据分析并报表显示完整项目

 

热力图参考之前我之前的博客  其中之前是用 c# 写的,这里用java + spring 改写一下

思路看之前的博客这里放代码

经纬度转换类:lngandlatutil

package suyuan.web;

import java.io.bufferedreader;
import java.io.ioexception;
import java.io.inputstreamreader;
import java.io.unsupportedencodingexception;
import java.net.malformedurlexception;
import java.net.url;
import java.net.urlconnection;

public class lngandlatutil 
{
    public object[] getcoordinate(string addr) throws ioexception
    {
        string lng = null;// 经度
        string lat = null;// 纬度
        string address = null;
        try
        {
            address = java.net.urlencoder.encode(addr, "utf-8");
        } catch (unsupportedencodingexception e1)
        {
            e1.printstacktrace();
        }
        string key = "你的秘钥";
        string url = string.format("http://api.map.baidu.com/geocoder?address=%s&output=json&key=%s", address, key);
        url myurl = null;
        urlconnection httpsconn = null;
        try
        {
            myurl = new url(url);
        } catch (malformedurlexception e)
        {
           e.printstacktrace();
        }
        inputstreamreader insr = null;
        bufferedreader br = null;
        try
        {
            httpsconn = (urlconnection) myurl.openconnection();// 不使用代理
            if (httpsconn != null)
            {
                insr = new inputstreamreader(httpsconn.getinputstream(), "utf-8");
                br = new bufferedreader(insr);
                string data = null;
                int count = 1;
                while ((data = br.readline()) != null)
                {
                    if (count == 5)
                    {
                        try{
                        lng = (string) data.subsequence(data.indexof(":") + 1, data.indexof(","));// 经度

                        count++;
                        }
                        catch(stringindexoutofboundsexception e)
                        {
                            e.printstacktrace();
                        }
                    } else if (count == 6)
                    {
                        lat = data.substring(data.indexof(":") + 1);// 纬度
                        count++;
                    } else
                    {
                       count++;
                   }
                }
            }
        } catch (ioexception e)
        {
            e.printstacktrace();
        } finally
        {
            if (insr != null)
            {
                insr.close();
            }
            if (br != null)
            {
                br.close();
            }
        }
        return new object[] { lng, lat };
    }
}

ipdto:(数据库映射类)

ETL项目2:大数据清洗,处理:使用MapReduce进行离线数据分析并报表显示完整项目

 

package suyuan.entity;

public class ipdto
{
    public string top;
    
    public integer num;

    public string gettop()
    {
        return top;
    }

    public void settop(string top)
    {
        this.top = top;
    }

    public integer getnum()
    {
        return num;
    }

    public void setnum(integer num)
    {
        this.num = num;
    }
    
    
}

ip:(热力图json类)

package suyuan.entity;

public class ip
{
     public string lng ;

     public string lat ;

     public int count ;

    public string getlng()
    {
        return lng;
    }

    public void setlng(string lng)
    {
        this.lng = lng;
    }

    public string getlat()
    {
        return lat;
    }

    public void setlat(string lat)
    {
        this.lat = lat;
    }

    public int getcount()
    {
        return count;
    }

    public void setcount(int count)
    {
        this.count = count;
    }
     
     
}

dao层转换方法

public list<ip> getip() throws sqlexception
    {
        list<ipdto>  ipdto = null;
        list<ip> iplist=new arraylist<ip>();
        // 编写sql语句
        string sql = "select top,num from `suyuan09_etl_orcipmysql`";
        // 占位符赋值?
        // 执行
        
        ipdto = qr.query(sql, new beanlisthandler<ipdto>(ipdto.class));
        
        for(ipdto ips: ipdto)
        {
            ip ip=new ip();
            integer num = ips.getnum();
            string top = ips.gettop();
            // 封装
            lngandlatutil getlatandlngbybaidu = new lngandlatutil();
            object[] o = null;
            try
            {
                o = getlatandlngbybaidu.getcoordinate(top);
            } catch (ioexception e)
            {
                // todo auto-generated catch block
                e.printstacktrace();
            }

            ip.setlng(string.valueof(o[0]));
            ip.setlat(string.valueof(o[1]));
            ip.setcount(num);
            iplist.add(ip);
        }
        
        // 返回
        return iplist;
    }

控制器调用返回json:

    @requestmapping("/getip")
    public @responsebody list<ip> getip()throws exception{
        return studentservice.getip();
        
    }

jsp页面显示:

<%@ page language="java" import="java.util.*" pageencoding="utf-8"%>
<%@ taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c"%>
<%@ taglib uri="http://java.sun.com/jsp/jstl/fmt" prefix="fmt"%>
<!doctype html>

<html>
<head >
    <meta charset="utf-8">
   
    <script type="text/javascript" src="http://api.map.baidu.com/api?v=2.0&ak=1freir62vfboc2vgrbnragyutlkgoiih"></script>
    <script type="text/javascript" src="http://api.map.baidu.com/library/heatmap/2.0/src/heatmap_min.js"></script>
   <script type="text/javascript"
    src="${pagecontext.request.contextpath}/js/jquery-1.9.1.js"></script>

    <title></title>
     <style type="text/css">
        ul,li{list-style: none;margin:0;padding:0;float:left;}
        html{height:100%}
        body{height:100%;margin:0px;padding:0px;font-family:"微软雅黑";}
        #container{height:700px;width:100%;}
        #r-result{width:100%;}
    </style>    
</head>
<body>
    <form id="form1" runat="server">
    <div>
        <div id="container"></div>
        <div id="r-result">
            <input type="button"  onclick="openheatmap();" value="显示热力图"/><input type="button"  onclick="closeheatmap();" value="关闭热力图"/>
        </div>
    </div>
    </form>
</body>
</html>

<script type="text/javascript">
    var map = new bmap.map("container");          // 创建地图实例

    var point = new bmap.point(118.906886, 31.895532);
    map.centerandzoom(point, 15);             // 初始化地图,设置中心点坐标和地图级别
    map.enablescrollwheelzoom(); // 允许滚轮缩放



    if (!issupportcanvas()) {
        alert('热力图目前只支持有canvas支持的浏览器,您所使用的浏览器不能使用热力图功能~')
    }
 
   
    heatmapoverlay = new bmaplib.heatmapoverlay({ "radius": 20 });
    map.addoverlay(heatmapoverlay);
    heatmapoverlay.setdataset({ data: function () {

        var serie = [];
        $.ajax({
            url: "${pagecontext.request.contextpath}/getip",
            
            datatype: "json",
            async: false,
            success: function (datajson) {
                for (var i = 0; i < datajson.length; i++) {
                    var item = {
                        //name: res.data.titlelist7[i],
                        //value: randomdata()
                        lat: datajson[i].lat,
                        lng: datajson[i].lng,
                        count: datajson[i].count
                    };
                    serie.push(item);
                }
            }
        });


        return serie;
    } (), max: 100 });
    //是否显示热力图
    function openheatmap() {
        heatmapoverlay.show();
    }
    function closeheatmap() {
        heatmapoverlay.hide();
    }
    closeheatmap();
    function setgradient() {
        /*格式如下所示:
        {
        0:'rgb(102, 255, 0)',
        .5:'rgb(255, 170, 0)',
        1:'rgb(255, 0, 0)'
        }*/
        var gradient = {};
        var colors = document.queryselectorall("input[type='color']");
        colors = [].slice.call(colors, 0);
        colors.foreach(function (ele) {
            gradient[ele.getattribute("data-key")] = ele.value;
        });
        heatmapoverlay.setoptions({ "gradient": gradient });
    }
    //判断浏览区是否支持canvas
    function issupportcanvas() {
        var elem = document.createelement('canvas');
        return !!(elem.getcontext && elem.getcontext('2d'));
    }
</script>        
       

 

图表也参考我之前的博客