101.Spark大型电商项目-各区域热门商品统计-查询用户指定日期范围内的点击行为数据
程序员文章站
2022-05-14 21:33:47
...
目录
本篇文章记录各区域热门商品统计-查询用户指定日期范围内的点击行为数据。
代码
spark.product
AreaTop3ProductSpark.java
package graduation.java.spark.product; import com.alibaba.fastjson.JSONObject; import graduation.java.constant.Constants; import graduation.java.dao.ITaskDAO; import graduation.java.domain.Task; import graduation.java.factory.DAOFactory; import graduation.java.util.ParamUtils; import graduation.java.util.SparkUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.SQLContext; /** * FileName: AreaTop3ProductSpark * Author: hadoop * Email: [email protected] * Date: 19-4-1 下午4:17 * Description: * * 各区域top3热门商品统计Spark作业 */ public class AreaTop3ProductSpark { public static void main(String[] args) { //1.创建SparkConf SparkConf conf = new SparkConf() .setAppName(Constants.SPARK_APP_NAME_PRODUCT); SparkUtils.setMaster(conf); //2.构建spark上下文、 JavaSparkContext sc = new JavaSparkContext(conf); SQLContext sqlContext = SparkUtils.getSQLContext(sc.sc()); //3.模拟数据 SparkUtils.mockData(sc,sqlContext); //4.获取命令行传入的taskid,查询对应的任务参数 ITaskDAO iTaskDAO = DAOFactory.getTaskDAO(); long taskid = ParamUtils.getTaskIdFromArgs(args,Constants.SPARK_LOCAL_TASKID_PRODUCT); Task task = iTaskDAO.findById(taskid); JSONObject taskParam = JSONObject.parseObject(task.getTaskParam()); String startDate = ParamUtils.getParam(taskParam,Constants.PARAM_START_DATE); String endDate = ParamUtils.getParam(taskParam,Constants.PARAM_END_DATE); JavaRDD<Row> clickActionRDD = getClickActionRDDByDate(sqlContext,startDate,endDate); sc.close(); } /**查询指定日期范围内的点击行为数据 * * @param sqlContext * @param startDate 起始日期 * @param endDate 结束日期 * @return */ private static JavaRDD<Row> getClickActionRDDByDate(SQLContext sqlContext, String startDate, String endDate) { // 从user_visit_action中,查询用户访问行为数据 // 第一个限定:click_product_id,限定为不为空的访问行为,那么就代表着点击行为 // 第二个限定:在用户指定的日期范围内的数据 String sql = "SELECT " + "city_id," + "click_product_id product_id " + "FROM user_visit_action " + "WHERE click_product_id IS NOT NULL " + "AND click_product_id != 'NULL' " + "AND click_product_id != 'null' " + "AND action_time >='" + startDate + "' " + "AND action_time <= '"+ endDate + "'" ; Dataset clickActionDs = sqlContext.sql(sql); return clickActionDs.javaRDD(); } }