|
|
@@ -29,6 +29,7 @@ import org.springframework.stereotype.Service;
|
|
29
|
29
|
import org.springframework.transaction.annotation.Transactional;
|
|
30
|
30
|
|
|
31
|
31
|
import javax.annotation.Resource;
|
|
|
32
|
+import java.util.ArrayList;
|
|
32
|
33
|
import java.util.Date;
|
|
33
|
34
|
import java.util.List;
|
|
34
|
35
|
|
|
|
@@ -63,6 +64,7 @@ public class BizSinQinghaiNewsServiceImpl extends ServiceImpl<BizSinQinghaiNewsM
|
|
63
|
64
|
final HtmlPage page=webClient.getPage(newsUrl.getQinghai()); //从指定URL获取HtmlPage
|
|
64
|
65
|
List<HtmlParagraph> divList=page.getByXPath("//p[@class='item']");
|
|
65
|
66
|
int i = 0;
|
|
|
67
|
+ List<BizSinQinghaiNews> list1 = new ArrayList<>();
|
|
66
|
68
|
for (HtmlParagraph node : divList) {
|
|
67
|
69
|
if (i==8){ //只抓取最近8条数
|
|
68
|
70
|
break;
|
|
|
@@ -73,13 +75,14 @@ public class BizSinQinghaiNewsServiceImpl extends ServiceImpl<BizSinQinghaiNewsM
|
|
73
|
75
|
continue;
|
|
74
|
76
|
}
|
|
75
|
77
|
//先查询是否有相同标题内容,存在跳过
|
|
76
|
|
- QueryWrapper<BizSinQinghaiNews> queryWrapper = new QueryWrapper<>();
|
|
77
|
|
- queryWrapper.lambda().eq(BizSinQinghaiNews::getTitle,anchor.getAttribute("title"));
|
|
78
|
|
- List<BizSinQinghaiNews> list = this.list(queryWrapper);
|
|
79
|
|
- if (!list.isEmpty()){ //表示此内容已经存在
|
|
80
|
|
- continue;
|
|
81
|
|
- }
|
|
82
|
|
- i++;
|
|
|
78
|
+// QueryWrapper<BizSinQinghaiNews> queryWrapper = new QueryWrapper<>();
|
|
|
79
|
+// queryWrapper.lambda()
|
|
|
80
|
+// .eq(BizSinQinghaiNews::getTitle,anchor.getAttribute("title"))
|
|
|
81
|
+// .eq(BizSinQinghaiNews::getSource,1); //来源
|
|
|
82
|
+// List<BizSinQinghaiNews> list = this.list(queryWrapper);
|
|
|
83
|
+// if (!list.isEmpty()){ //表示此内容已经存在
|
|
|
84
|
+// continue;
|
|
|
85
|
+// }
|
|
83
|
86
|
BizSinQinghaiNews bizSinQinghaiNews = new BizSinQinghaiNews();
|
|
84
|
87
|
//发布日期
|
|
85
|
88
|
String[] string = span.getTextContent().split("]");
|
|
|
@@ -90,11 +93,20 @@ public class BizSinQinghaiNewsServiceImpl extends ServiceImpl<BizSinQinghaiNewsM
|
|
90
|
93
|
bizSinQinghaiNews.setTitle(anchor.getAttribute("title").trim());
|
|
91
|
94
|
bizSinQinghaiNews.setRedirectUrl(anchor.getAttribute("href").trim());
|
|
92
|
95
|
bizSinQinghaiNews.setIsRedirect(1);
|
|
|
96
|
+ bizSinQinghaiNews.setSource(1); //来源
|
|
93
|
97
|
bizSinQinghaiNews.setStatus(2); //默认使用提交状态
|
|
94
|
|
- super.save(bizSinQinghaiNews);
|
|
95
|
|
-
|
|
|
98
|
+ list1.add(bizSinQinghaiNews);
|
|
|
99
|
+ i++;
|
|
96
|
100
|
}
|
|
|
101
|
+ if (list1.size() >0) {
|
|
|
102
|
+ //清理抓取数据,只保留今天抓取数据
|
|
|
103
|
+ QueryWrapper<BizSinQinghaiNews> queryWrapper = new QueryWrapper<>();
|
|
|
104
|
+ queryWrapper.lambda()
|
|
|
105
|
+ .eq(BizSinQinghaiNews::getSource, 1); //来源
|
|
97
|
106
|
|
|
|
107
|
+ this.remove(queryWrapper);
|
|
|
108
|
+ this.saveOrUpdateBatch(list1);
|
|
|
109
|
+ }
|
|
98
|
110
|
} catch (Exception e) {
|
|
99
|
111
|
e.printStackTrace();
|
|
100
|
112
|
}finally {
|