Sparrow算法篇 从日期取交集到思维模式-2
程序员文章站
2022-03-05 10:47:17
...
接上一篇
- 这样的时间段有成百上千条该如何处理?
- 如果我们需要根据具有日期交集的时间段分组呢?
- 如果我们的业务不是日期,而是其他数据类型呢?如何抽象出计算模型?非日期型数据也可以进行分组?
上一篇分享日期取交集的核心逻辑。 但映射到具体业务上可能有更复杂的场景,比如第一个问题,两个日期取交集还好搞好,但日期段很多的情况下,如何按每一个时间段相同的数据进行分组呢。
即每两个红点之间的日期不能出现断点,要么没有交集,有交集就一定是连续的。 所以解决这个问题的第一步就是如何确定红点(这一点很重要,凭直觉,遵照人脑思维落点,如果根据日期探测,逻辑将变得非常困难)。
- 第一步确定之后,我们根据所有日期段的开始和截止点进行坐标落点
- 第二步根据坐标点,确定我们将要拆分的日期段组。
- 第三步 日期段组的起始时间确定后,再依次根据日期取交集,即可实现多日期段下的日期交集逻辑。
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.sparrow.core.algorithm.gouping;
import java.util.*;
/**
* E 为段类型 D为点类型
* @author harry
*/
public class Coordinate<E extends Segment<D>, D extends Comparable> {
public Coordinate(List<E> dataList) {
this.dataList = dataList;
}
private List<E> dataList = new ArrayList<E>();
protected List<Segment<D>> segments = new ArrayList<Segment<D>>();
protected List<Point<D>> coordinate = new ArrayList<Point<D>>();
public List<Point<D>> getCoordinate() {
return coordinate;
}
//画从标点
public void draw() {
Map<Comparable, Point<D>> coordinate = new TreeMap<Comparable, Point<D>>();
for (E segment : this.dataList) {
Point<D> point = coordinate.get(segment.getStart().getPoint());
if (point == null) {
point = new Point<D>(segment.getStart().getPoint(), true, null);
coordinate.put(segment.getStart().getPoint(), point);
}
point = coordinate.get(segment.getEnd().getPoint());
if (point == null) {
point = new Point<D>(segment.getEnd().getPoint(), null, true);
coordinate.put(segment.getEnd().getPoint(), point);
}
}
this.coordinate = new ArrayList<Point<D>>(coordinate.values());
}
//分组,可被重写实现
public void section() {
for (int i = 0; i < this.coordinate.size() - 1; i++) {
Point<D> current = this.coordinate.get(i);
Point<D> next = this.coordinate.get(i + 1);
this.segments.add(new Segment<D>(current, next));
}
}
//对每组数据聚合操作
public Map<Segment<D>, List<E>> aggregation() {
Map<Segment<D>, List<E>> segmentMap = new HashMap<Segment<D>, List<E>>();
for (Segment<D> segment : this.segments) {
segmentMap.put(segment, this.aggregation(segment));
}
return segmentMap;
}
private List<E> aggregation(Segment<D> s) {
List<E> segments = new ArrayList<E>();
for (E segment : this.dataList) {
if (segment.getStart().getPoint().compareTo(s.getStart().getPoint()) > 0) {
continue;
}
if (segment.getEnd().getPoint().compareTo(s.getStart().getPoint()) > 0) {
segments.add(segment);
}
}
return segments;
}
public List<Segment<D>> getSegments() {
return segments;
}
}
类封装之后可供上层业务端调用,为考虑其他业务场景,非日期类型数据的取交集我们将日期段类定义为泛型(类型参数化)
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.sparrow.core.algorithm.gouping;
/**
* @author by harry
*/
public class Segment<T extends Comparable> {
private Point<T> start;
private Point<T> end;
public Segment(T start, T end) {
if (end.compareTo(start) < 0) {
throw new IllegalArgumentException("this.end<this.start");
}
this.start = new Point<T>(start);
this.end = new Point<T>(end);
}
public Segment(Point<T> start, Point<T> end) {
if (end.getPoint().compareTo(start.getPoint()) < 0) {
throw new IllegalArgumentException("end=" + end.getPoint() + "< start=" + start.getPoint());
}
this.start = start;
this.end = end;
}
public Point<T> getStart() {
return start;
}
public void setStart(Point<T> start) {
this.start = start;
}
public Point<T> getEnd() {
return end;
}
public void setEnd(Point<T> end) {
this.end = end;
}
public boolean equals(Segment<T> segment) {
if (this.start.getPoint().compareTo(segment.start.getPoint()) != 0) {
return false;
}
return this.end.getPoint().compareTo(segment.end.getPoint()) == 0;
}
public Segment<T> intersection(Segment<T> segment) {
if (segment.getEnd().getPoint().compareTo(segment.getStart().getPoint()) < 0) {
throw new IllegalArgumentException("segment.end < segment.start");
}
//取大的起始节点
Point<T> start = this.start.getPoint().compareTo(segment.start.getPoint()) > 0 ? this.start : segment.start;
//取小的截止节点
Point<T> end = this.end.getPoint().compareTo(segment.end.getPoint()) < 0 ? this.end : segment.end;
if (start.getPoint().compareTo(end.getPoint()) <= 0) {
return new Segment(start, end);
}
return null;
}
public Segment union(Segment<T> segment) {
if (segment.getEnd().getPoint().compareTo(segment.getStart().getPoint()) < 0) {
throw new IllegalArgumentException("segment.end < segment.start");
}
//取小的起始节点
Point<T> start = this.start.getPoint().compareTo(segment.start.getPoint()) < 0 ? this.start : segment.start;
//取大的截止节点
Point<T> end = this.end.getPoint().compareTo(segment.end.getPoint()) > 0 ? this.end : segment.end;
if (start.getPoint().compareTo(end.getPoint()) <= 0) {
return new Segment<T>(start, end);
}
return null;
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
Segment<?> segment = (Segment<?>) o;
return start.equals(segment.start) && end.equals(segment.end);
}
@Override
public int hashCode() {
int result = start.hashCode();
result = 31 * result + end.hashCode();
return result;
}
@Override
public String toString() {
return "Segment{" +
"start=" + start.getPoint() +
", end=" + end.getPoint() +
'}';
}
}
如果segment还有其他的属性,可以继承实现segment类
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.sparrow.facade.segment;
import com.sparrow.constant.DATE_TIME;
import com.sparrow.core.algorithm.gouping.Segment;
import com.sparrow.utility.DateTimeUtility;
/**
* @author harry
*/
public class BusinessSegment extends Segment<Long> {
public BusinessSegment(Long id, String type, Long start, Long end) {
super(start, end);
this.id = id;
this.type = type;
}
//自定义属 性
private Long id;
//自定义属 性
private String type;
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public Long getId() {
return id;
}
public void setId(Long id) {
this.id = id;
}
@Override
public String toString() {
String start=DateTimeUtility.getFormatTime((Long) this.getStart().getPoint(), DATE_TIME.FORMAT_YYYY_MM_DD);
String end=DateTimeUtility.getFormatTime((Long)this.getEnd().getPoint(), DATE_TIME.FORMAT_YYYY_MM_DD);
return "cat-record{" +
"id=" + id +
", type='" + type + '\'' +
"} " + start+"-"+end;
}
}
具体业务的分组逻辑可能不同,继承重写section方法即可
static class IntegerCoordinate extends Coordinate<BusinessSegment, Long> {
public IntegerCoordinate(List<BusinessSegment> dataList) {
super(dataList);
}
@Override
public void section() {
for (Point<Long> point : this.coordinate) {
System.out.println(DateTimeUtility.getFormatTime(point.getPoint(), DATE_TIME.FORMAT_YYYY_MM_DD_HH_MM_SS));
}
for (int i = 0; i < this.coordinate.size() - 1; i++) {
Point<Long> current = this.coordinate.get(i);
Point<Long> start = Point.copy(current);
Calendar calendar = Calendar.getInstance();
calendar.setTimeInMillis(start.getPoint());
if (calendar.get(Calendar.HOUR_OF_DAY) == 23) {
calendar.add(Calendar.SECOND, 1);
start.setPoint(calendar.getTimeInMillis());
}
Point<Long> next = this.coordinate.get(i + 1);
Point<Long> end = Point.copy(next);
if (DateTimeUtility.getInterval(start.getPoint(), next.getPoint(), DATE_TIME_UNIT.SECOND) <= 5) {
i++;
continue;
}
calendar.setTimeInMillis(end.getPoint());
if (calendar.get(Calendar.HOUR_OF_DAY) == 0) {
calendar.add(Calendar.SECOND, -1);
end.setPoint(calendar.getTimeInMillis());
}
System.out.println(DateTimeUtility.getFormatTime(start.getPoint(), DATE_TIME.FORMAT_YYYY_MM_DD_HH_MM_SS) + "-" +
DateTimeUtility.getFormatTime(end.getPoint(), DATE_TIME.FORMAT_YYYY_MM_DD_HH_MM_SS));
this.segments.add(new Segment<Long>(start, end));
}
}
}
demo示例
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.sparrow.facade.segment;
import com.sparrow.constant.DATE_TIME;
import com.sparrow.core.algorithm.gouping.Point;
import com.sparrow.core.algorithm.gouping.Segment;
import com.sparrow.core.algorithm.gouping.Coordinate;
import com.sparrow.enums.DATE_TIME_UNIT;
import com.sparrow.utility.DateTimeUtility;
import java.util.*;
/**
* @author harry
*/
public class Main {
static class IntegerCoordinate extends Coordinate<BusinessSegment, Long> {
public IntegerCoordinate(List<BusinessSegment> dataList) {
super(dataList);
}
@Override
public void section() {
for (Point<Long> point : this.coordinate) {
System.out.println(DateTimeUtility.getFormatTime(point.getPoint(), DATE_TIME.FORMAT_YYYY_MM_DD_HH_MM_SS));
}
for (int i = 0; i < this.coordinate.size() - 1; i++) {
Point<Long> current = this.coordinate.get(i);
Point<Long> start = Point.copy(current);
Calendar calendar = Calendar.getInstance();
calendar.setTimeInMillis(start.getPoint());
if (calendar.get(Calendar.HOUR_OF_DAY) == 23) {
calendar.add(Calendar.SECOND, 1);
start.setPoint(calendar.getTimeInMillis());
}
Point<Long> next = this.coordinate.get(i + 1);
Point<Long> end = Point.copy(next);
if (DateTimeUtility.getInterval(start.getPoint(), next.getPoint(), DATE_TIME_UNIT.SECOND) <= 5) {
i++;
continue;
}
calendar.setTimeInMillis(end.getPoint());
if (calendar.get(Calendar.HOUR_OF_DAY) == 0) {
calendar.add(Calendar.SECOND, -1);
end.setPoint(calendar.getTimeInMillis());
}
System.out.println(DateTimeUtility.getFormatTime(start.getPoint(), DATE_TIME.FORMAT_YYYY_MM_DD_HH_MM_SS) + "-" +
DateTimeUtility.getFormatTime(end.getPoint(), DATE_TIME.FORMAT_YYYY_MM_DD_HH_MM_SS));
this.segments.add(new Segment<Long>(start, end));
}
}
}
enum SEGMENT_TYPE {
TYPE_001,
TYPE_002,
TYPE_003,
TYPE_004,
TYPE_005,
TYPE_006,
TYPE_007,
TYPE_008,
TYPE_009,
TYPE_010
}
public static void main(String[] args) {
List<BusinessSegment> list = new ArrayList<BusinessSegment>();
//Long id, String type, Integer start, Integer end
list.add(new BusinessSegment(1L, SEGMENT_TYPE.TYPE_001.name(), DateTimeUtility.parse("2018-02-05 00:00:00", DATE_TIME.FORMAT_YYYY_MM_DD_HH_MM_SS), DateTimeUtility.parse("2018-02-11 23:59:59", DATE_TIME.FORMAT_YYYY_MM_DD_HH_MM_SS)));
list.add(new BusinessSegment(1L, SEGMENT_TYPE.TYPE_002.name(), DateTimeUtility.parse("2018-02-06 00:00:00", DATE_TIME.FORMAT_YYYY_MM_DD_HH_MM_SS), DateTimeUtility.parse("2018-02-10 23:59:59", DATE_TIME.FORMAT_YYYY_MM_DD_HH_MM_SS)));
list.add(new BusinessSegment(1L, SEGMENT_TYPE.TYPE_004.name(), DateTimeUtility.parse("2018-02-07 00:00:00", DATE_TIME.FORMAT_YYYY_MM_DD_HH_MM_SS), DateTimeUtility.parse("2018-02-09 23:59:59", DATE_TIME.FORMAT_YYYY_MM_DD_HH_MM_SS)));
list.add(new BusinessSegment(1L, SEGMENT_TYPE.TYPE_005.name(), DateTimeUtility.parse("2018-02-08 00:00:00", DATE_TIME.FORMAT_YYYY_MM_DD_HH_MM_SS), DateTimeUtility.parse("2018-02-08 23:59:59", DATE_TIME.FORMAT_YYYY_MM_DD_HH_MM_SS)));
list.add(new BusinessSegment(1L, SEGMENT_TYPE.TYPE_006.name(), DateTimeUtility.parse("2018-02-05 00:00:00", DATE_TIME.FORMAT_YYYY_MM_DD_HH_MM_SS), DateTimeUtility.parse("2018-02-19 23:59:59", DATE_TIME.FORMAT_YYYY_MM_DD_HH_MM_SS)));
list.add(new BusinessSegment(1L, SEGMENT_TYPE.TYPE_007.name(), DateTimeUtility.parse("2018-02-20 00:00:00", DATE_TIME.FORMAT_YYYY_MM_DD_HH_MM_SS), DateTimeUtility.parse("2018-02-22 23:59:59", DATE_TIME.FORMAT_YYYY_MM_DD_HH_MM_SS)));
list.add(new BusinessSegment(1L, SEGMENT_TYPE.TYPE_008.name(), DateTimeUtility.parse("2018-02-21 00:00:00", DATE_TIME.FORMAT_YYYY_MM_DD_HH_MM_SS), DateTimeUtility.parse("2018-02-23 23:59:59", DATE_TIME.FORMAT_YYYY_MM_DD_HH_MM_SS)));
list.add(new BusinessSegment(1L, SEGMENT_TYPE.TYPE_009.name(), DateTimeUtility.parse("2018-02-22 00:00:00", DATE_TIME.FORMAT_YYYY_MM_DD_HH_MM_SS), DateTimeUtility.parse("2018-02-24 23:59:59", DATE_TIME.FORMAT_YYYY_MM_DD_HH_MM_SS)));
list.add(new BusinessSegment(1L, SEGMENT_TYPE.TYPE_010.name(), DateTimeUtility.parse("2018-02-23 00:00:00", DATE_TIME.FORMAT_YYYY_MM_DD_HH_MM_SS), DateTimeUtility.parse("2018-02-25 23:59:59", DATE_TIME.FORMAT_YYYY_MM_DD_HH_MM_SS)));
list.add(new BusinessSegment(1L, SEGMENT_TYPE.TYPE_010.name(), DateTimeUtility.parse("2018-02-25 00:00:00", DATE_TIME.FORMAT_YYYY_MM_DD_HH_MM_SS), DateTimeUtility.parse("2018-02-28 23:59:59", DATE_TIME.FORMAT_YYYY_MM_DD_HH_MM_SS)));
Coordinate<BusinessSegment, Long> coordinate = new IntegerCoordinate(list);
coordinate.draw();
coordinate.section();
Map<Segment<Long>, List<BusinessSegment>> map = coordinate.aggregation();
for (Segment segment : coordinate.getSegments()) {
System.out.print(DateTimeUtility.getFormatTime((Long) segment.getStart().getPoint(), DATE_TIME.FORMAT_YYYY_MM_DD) + "-");
System.out.println(DateTimeUtility.getFormatTime((Long) segment.getEnd().getPoint(), DATE_TIME.FORMAT_YYYY_MM_DD));
System.out.println(map.get(segment));
}
}
}
示例运行结果
2018-02-05->2018-02-05
[cat-record{id=1, type='TYPE_001'} 2018-02-05-2018-02-11, cat-record{id=1, type='TYPE_006'} 2018-02-05-2018-02-19]
2018-02-06->2018-02-06
[cat-record{id=1, type='TYPE_001'} 2018-02-05-2018-02-11, cat-record{id=1, type='TYPE_002'} 2018-02-06-2018-02-10, cat-record{id=1, type='TYPE_006'} 2018-02-05-2018-02-19]
2018-02-07->2018-02-07
[cat-record{id=1, type='TYPE_001'} 2018-02-05-2018-02-11, cat-record{id=1, type='TYPE_002'} 2018-02-06-2018-02-10, cat-record{id=1, type='TYPE_004'} 2018-02-07-2018-02-09, cat-record{id=1, type='TYPE_006'} 2018-02-05-2018-02-19]
2018-02-08->2018-02-08
[cat-record{id=1, type='TYPE_001'} 2018-02-05-2018-02-11, cat-record{id=1, type='TYPE_002'} 2018-02-06-2018-02-10, cat-record{id=1, type='TYPE_004'} 2018-02-07-2018-02-09, cat-record{id=1, type='TYPE_005'} 2018-02-08-2018-02-08, cat-record{id=1, type='TYPE_006'} 2018-02-05-2018-02-19]
2018-02-09->2018-02-09
[cat-record{id=1, type='TYPE_001'} 2018-02-05-2018-02-11, cat-record{id=1, type='TYPE_002'} 2018-02-06-2018-02-10, cat-record{id=1, type='TYPE_004'} 2018-02-07-2018-02-09, cat-record{id=1, type='TYPE_006'} 2018-02-05-2018-02-19]
2018-02-10->2018-02-10
[cat-record{id=1, type='TYPE_001'} 2018-02-05-2018-02-11, cat-record{id=1, type='TYPE_002'} 2018-02-06-2018-02-10, cat-record{id=1, type='TYPE_006'} 2018-02-05-2018-02-19]
2018-02-11->2018-02-11
[cat-record{id=1, type='TYPE_001'} 2018-02-05-2018-02-11, cat-record{id=1, type='TYPE_006'} 2018-02-05-2018-02-19]
2018-02-12->2018-02-19
[cat-record{id=1, type='TYPE_006'} 2018-02-05-2018-02-19]
2018-02-21->2018-02-21
[cat-record{id=1, type='TYPE_007'} 2018-02-20-2018-02-22, cat-record{id=1, type='TYPE_008'} 2018-02-21-2018-02-23]
2018-02-22->2018-02-22
[cat-record{id=1, type='TYPE_007'} 2018-02-20-2018-02-22, cat-record{id=1, type='TYPE_008'} 2018-02-21-2018-02-23, cat-record{id=1, type='TYPE_009'} 2018-02-22-2018-02-24]
2018-02-24->2018-02-24
[cat-record{id=1, type='TYPE_009'} 2018-02-22-2018-02-24, cat-record{id=1, type='TYPE_010'} 2018-02-23-2018-02-25]
2018-02-26->2018-02-28
[cat-record{id=1, type='TYPE_010'} 2018-02-25-2018-02-28]
完整示例代码下载 https://github.com/sparrowzoo/sparrow
sparrow-test项目下的
com.sparrow.facade.segmen.Main方法