提交 2f13c164 authored 作者: wangmenglong's avatar wangmenglong

增加爬虫

上级 306fdcac
......@@ -258,11 +258,21 @@ public class Recruit extends NameObj {
@TableField(exist = false)
private String recordId;
/**
* 是否上架过
**/
@TableField("launch")
private String launch;
/**
* 是否批量 0否 1是
**/
@TableField("batch")
private String batch;
/**
* 是否批量发送过了 0否 1是
**/
@TableField("batch_send")
private String batchSend;
}
......@@ -7,12 +7,14 @@ import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.cache.annotation.EnableCaching;
import org.springframework.cloud.client.discovery.EnableDiscoveryClient;
import org.springframework.cloud.context.config.annotation.RefreshScope;
import org.springframework.scheduling.annotation.EnableAsync;
@RefreshScope
@EnableDiscoveryClient
@SpringBootApplication(scanBasePackages = {"com.jfb.recruit"})
@MapperScan(basePackages = "com.jfb.recruit.mapper")
@EnableCaching // 开启缓存
@EnableAsync // 开启异步(需保留,用于注入线程池)
public class JfbRecruitApplication {
public static void main(String[] args) {
......
......@@ -53,6 +53,8 @@ public class RecruitController extends BaseController {
**/
@PostMapping("/c")
public BaseResult create(@RequestBody Recruit recruit){
recruit.setBatch("0");
recruit.setBatchSend("0");
return recruitService.create(recruit)?BaseResult.success("normal_001"):BaseResult.error("error_001");
}
......
......@@ -10,10 +10,13 @@ import com.jfb.recruit.util.Excelutils.ExcelAppendUtil;
import com.jfb.recruit.util.Excelutils.JobsExcel;
import com.jfb.recruit.util.Excelutils.OrgExcel;
import com.jfb.recruit.util.Excelutils.OrgExcelAppendUtil;
import com.jfb.recruit.xsnowflake.SnowFlakeFactory;
import data.recruit.Org;
import data.recruit.Recruit;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.MediaType;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile;
......@@ -23,10 +26,7 @@ import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import static sun.font.CreatedFontTracker.MAX_FILE_SIZE;
import java.util.*;
/**
* @author wangmenglong
......@@ -40,6 +40,12 @@ public class ImportRecruitController extends BaseController {
@Resource
RecruitService recruitService;
@Autowired
SnowFlakeFactory snowFlakeFactory;
@Autowired
private ThreadPoolTaskExecutor taskExecutor;
@Resource
OrgService orgService;
......@@ -48,7 +54,7 @@ public class ImportRecruitController extends BaseController {
private static final String EXCEL_FILE_ORG_NAME = "/pchongshuju/showOrg.xlsx";
/**
* 文件上传接口
* 文件上传接口导出excel
* @param file 上传的文件
* @param remark 备注(可选,前端传)
* @return 上传结果
......@@ -126,7 +132,7 @@ public class ImportRecruitController extends BaseController {
}
/**
* 文件上传接口
* 文件上传接口导出excel
* @param file 上传的文件
* @param remark 备注(可选,前端传)
* @return 上传结果
......@@ -269,71 +275,83 @@ public class ImportRecruitController extends BaseController {
@PostMapping(value = "/upload/51job/org/json", consumes = MediaType.MULTIPART_FORM_DATA_VALUE)
public BaseResult uploadJsonOrgFile(HttpServletRequest req,@RequestParam("file") MultipartFile file) {
String regionCode = req.getParameter("regionCode");
String province = req.getParameter("province");
String city = req.getParameter("city");
String region = req.getParameter("region");
// 1. 提交异步任务到线程池(后台执行)
taskExecutor.execute(() -> {
// 1. 校验文件是否为空
if (file.isEmpty()) {
return BaseResult.error("001");
}
String regionCode = req.getParameter("regionCode");
String province = req.getParameter("province");
String city = req.getParameter("city");
String region = req.getParameter("region");
// 2. 校验文件类型(增强严谨性,避免OOXML错误)
String originalFilename = file.getOriginalFilename();
// 优化:双重校验(后缀+内容类型,防止改后缀的非JSON文件)
if (originalFilename == null || !originalFilename.trim().toLowerCase().endsWith(".json")) {
return BaseResult.error("002");
}
// 可选:额外校验Content-Type,进一步防止非JSON文件
String contentType = file.getContentType();
if (contentType != null && !contentType.equalsIgnoreCase("application/json")
&& !contentType.equalsIgnoreCase("text/plain")) {
return BaseResult.error("002");
}
// 1. 校验文件是否为空
if (file.isEmpty()) {
System.out.println("文件为空------------------------------------------------------");
}
// 3. 读取文件全部内容(确保流仅用于文本读取,避免被Office解析库误处理)
StringBuilder jsonContent = new StringBuilder();
// 核心:仅用BufferedReader读取文本,不引入任何Office解析逻辑
try (BufferedReader reader = new BufferedReader(
new InputStreamReader(file.getInputStream(), StandardCharsets.UTF_8) // 强制UTF-8,避免乱码
)) {
String line;
while ((line = reader.readLine()) != null) {
jsonContent.append(line);
// 2. 校验文件类型(增强严谨性,避免OOXML错误)
String originalFilename = file.getOriginalFilename();
// 优化:双重校验(后缀+内容类型,防止改后缀的非JSON文件)
if (originalFilename == null || !originalFilename.trim().toLowerCase().endsWith(".json")) {
System.out.println("格式错误------------------------------------------------------");
}
// 可选:额外校验Content-Type,进一步防止非JSON文件
String contentType = file.getContentType();
if (contentType != null && !contentType.equalsIgnoreCase("application/json")
&& !contentType.equalsIgnoreCase("text/plain")) {
System.out.println("格式错误------------------------------------------------------");
}
JSONArray jsonArray = JSONArray.parseArray(jsonContent.toString());
// 3. 读取文件全部内容(确保流仅用于文本读取,避免被Office解析库误处理)
StringBuilder jsonContent = new StringBuilder();
// 核心:仅用BufferedReader读取文本,不引入任何Office解析逻辑
try (BufferedReader reader = new BufferedReader(
new InputStreamReader(file.getInputStream(), StandardCharsets.UTF_8) // 强制UTF-8,避免乱码
)) {
String line;
while ((line = reader.readLine()) != null) {
jsonContent.append(line);
}
JSONArray jsonArray = JSONArray.parseArray(jsonContent.toString());
for (int i = 0; i < jsonArray.size(); i++) {
JSONObject jsonObject = jsonArray.getJSONObject(i);
for (int i = 0; i < jsonArray.size(); i++) {
try {
Thread.sleep(500);
}catch (Exception e){
System.out.println("上传失败------------------------------------------------------");
}
JSONObject jsonObject = jsonArray.getJSONObject(i);
Org orgDetails = orgService.getName(jsonObject.getString("fullCompanyName"));
Org orgDetails = orgService.getName(jsonObject.getString("fullCompanyName"));
if (orgDetails == null) {
if (orgDetails == null) {
Org org = new Org();
org.setName(jsonObject.getString("name"));
org.setDescr(jsonObject.getString("intro"));
org.setAddress(jsonObject.getString("address"));
org.setRegionCode(regionCode);
org.setRegion(region);
org.setProvince(province);
org.setCity(city);
orgService.create(org);
}
Org org = new Org();
org.setName(jsonObject.getString("name"));
org.setDescr(jsonObject.getString("intro"));
org.setAddress(jsonObject.getString("address"));
org.setRegionCode(regionCode);
org.setRegion(region);
org.setProvince(province);
org.setCity(city);
orgService.create(org);
}
}
System.out.println("上传完成------------------------------------------------------");
// 4. 返回成功响应
return BaseResult.success(jsonArray); // 优化:确保返回字符串类型
} catch (IOException e) {
System.out.println("io错误------------------------------------------------------");
}
} catch (IOException e) {
return BaseResult.error("003");
}
});
return BaseResult.success(); // 优化:确保返回字符串类型
}
/**
......@@ -345,103 +363,159 @@ public class ImportRecruitController extends BaseController {
@PostMapping(value = "/upload/51job/json", consumes = MediaType.MULTIPART_FORM_DATA_VALUE)
public BaseResult uploadJsonFile(HttpServletRequest req,@RequestParam("file") MultipartFile file) {
String recruitTypeId = req.getParameter("recruitTypeId");
String recruitType = req.getParameter("recruitType");
String experienceId = req.getParameter("experienceId");
String experience = req.getParameter("experience");
String qualificationId = req.getParameter("qualificationId");
String qualification = req.getParameter("qualification");
String payTimes = req.getParameter("payTimes");
String startDate = req.getParameter("startDate");
String expiredDate = req.getParameter("expiredDate");
String qualificationCode = req.getParameter("qualificationCode");
String experienceCode = req.getParameter("experienceCode");
String province = req.getParameter("province");
String city = req.getParameter("city");
String region = req.getParameter("region");
String oneIndustryName = req.getParameter("oneIndustryName");
String oneIndustryId = req.getParameter("oneIndustryId");
String towIndustryName = req.getParameter("towIndustryName");
String towIndustryId = req.getParameter("towIndustryId");
String industryCode = req.getParameter("industryCode");
// 1. 提交异步任务到线程池(后台执行)
taskExecutor.execute(() -> {
String recruitTypeId = req.getParameter("recruitTypeId");
String recruitType = req.getParameter("recruitType");
String experienceId = req.getParameter("experienceId");
String experience = req.getParameter("experience");
String qualificationId = req.getParameter("qualificationId");
String qualification = req.getParameter("qualification");
String payTimes = req.getParameter("payTimes");
String startDate = req.getParameter("startDate");
String expiredDate = req.getParameter("expiredDate");
String qualificationCode = req.getParameter("qualificationCode");
String experienceCode = req.getParameter("experienceCode");
String province = req.getParameter("province");
String city = req.getParameter("city");
String region = req.getParameter("region");
String oneIndustryName = req.getParameter("oneIndustryName");
String oneIndustryId = req.getParameter("oneIndustryId");
String towIndustryName = req.getParameter("towIndustryName");
String towIndustryId = req.getParameter("towIndustryId");
String industryCode = req.getParameter("industryCode");
String regionCode = req.getParameter("regionCode");
// 1. 校验文件是否为空
if (file.isEmpty()) {
System.out.println("文件为空------------------------------------------------------");
}
// 1. 校验文件是否为空
if (file.isEmpty()) {
return BaseResult.error("001");
}
// 2. 校验文件类型(增强严谨性,避免OOXML错误)
String originalFilename = file.getOriginalFilename();
// 优化:双重校验(后缀+内容类型,防止改后缀的非JSON文件)
if (originalFilename == null || !originalFilename.trim().toLowerCase().endsWith(".json")) {
System.out.println("文件类型错误------------------------------------------------------");
}
// 可选:额外校验Content-Type,进一步防止非JSON文件
String contentType = file.getContentType();
if (contentType != null && !contentType.equalsIgnoreCase("application/json")
&& !contentType.equalsIgnoreCase("text/plain")) {
System.out.println("文件类型错误------------------------------------------------------");
}
// 2. 校验文件类型(增强严谨性,避免OOXML错误)
String originalFilename = file.getOriginalFilename();
// 优化:双重校验(后缀+内容类型,防止改后缀的非JSON文件)
if (originalFilename == null || !originalFilename.trim().toLowerCase().endsWith(".json")) {
return BaseResult.error("002");
}
// 可选:额外校验Content-Type,进一步防止非JSON文件
String contentType = file.getContentType();
if (contentType != null && !contentType.equalsIgnoreCase("application/json")
&& !contentType.equalsIgnoreCase("text/plain")) {
return BaseResult.error("002");
}
// 3. 读取文件全部内容(确保流仅用于文本读取,避免被Office解析库误处理)
StringBuilder jsonContent = new StringBuilder();
// 核心:仅用BufferedReader读取文本,不引入任何Office解析逻辑
try (BufferedReader reader = new BufferedReader(
new InputStreamReader(file.getInputStream(), StandardCharsets.UTF_8) // 强制UTF-8,避免乱码
)) {
String line;
while ((line = reader.readLine()) != null) {
jsonContent.append(line);
}
// 3. 读取文件全部内容(确保流仅用于文本读取,避免被Office解析库误处理)
StringBuilder jsonContent = new StringBuilder();
// 核心:仅用BufferedReader读取文本,不引入任何Office解析逻辑
try (BufferedReader reader = new BufferedReader(
new InputStreamReader(file.getInputStream(), StandardCharsets.UTF_8) // 强制UTF-8,避免乱码
)) {
String line;
while ((line = reader.readLine()) != null) {
jsonContent.append(line);
}
JSONArray jsonArray = JSONArray.parseArray(jsonContent.toString());
for (int i = 0; i < jsonArray.size(); i++) {
JSONObject jsonObject = jsonArray.getJSONObject(i);
Org orgDetails = orgService.getName(jsonObject.getString("fullCompanyName"));
if(orgDetails==null){
orgDetails = new Org();
orgDetails.setName(jsonObject.getString("fullCompanyName"));
orgDetails.setDescr(jsonObject.getString("fullCompanyName")+"诚聘");
orgDetails.setRegionCode(regionCode);
orgDetails.setRegion(region);
orgDetails.setProvince(province);
orgDetails.setCity(city);
orgDetails.setId(snowFlakeFactory.nextId());
orgService.create(orgDetails);
}
if(orgDetails!=null&& StringUtils.isNotBlank(orgDetails.getId())){
Recruit recruit = new Recruit();
recruit.setRecruitType(recruitType);
recruit.setRecruitTypeId(recruitTypeId);
recruit.setExperience(experience);
recruit.setExperienceId(experienceId);
recruit.setQualification(qualification);
recruit.setQualificationId(qualificationId);
recruit.setPayTimes(Integer.parseInt(payTimes));
recruit.setStartDate(startDate);
recruit.setExpiredDate(expiredDate);
recruit.setQualificationCode(qualificationCode);
recruit.setExperienceCode(experienceCode);
recruit.setProvince(province);
recruit.setCity(city);
recruit.setRegion(region);
recruit.setOneIndustryName(oneIndustryName);
recruit.setOneIndustryId(oneIndustryId);
recruit.setTowIndustryName(towIndustryName);
recruit.setTowIndustryId(towIndustryId);
recruit.setIndustryCode(industryCode);
recruit.setName(jsonObject.getString("jobName"));
recruit.setDetails(jsonObject.getString("jobDescribe"));
recruit.setSalaryMax(jsonObject.getInteger("jobSalaryMax"));
recruit.setSalaryMin(jsonObject.getInteger("jobSalaryMin"));
recruit.setOrgId(orgDetails.getId());
recruit.setOrgName(orgDetails.getName());
recruit.setBatch("1");
recruit.setBatchSend("0");
recruit.setStatus("up");
recruitService.create(recruit);
try {
Thread.sleep(1000);
recruitService.sendAli(recruit);
}catch (Exception e){
e.printStackTrace();
}
}
}
JSONArray jsonArray = JSONArray.parseArray(jsonContent.toString());
// 4. 返回成功响应
System.out.println("完成导入------------------------------------------------------");
for (int i = 0; i < jsonArray.size(); i++) {
} catch (IOException e) {
System.out.println("io异常------------------------------------------------------");
}
JSONObject jsonObject = jsonArray.getJSONObject(i);
});
Org orgDetails = orgService.getName(jsonObject.getString("fullCompanyName"));
if(orgDetails!=null&& StringUtils.isNotBlank(orgDetails.getId())){
Recruit recruit = new Recruit();
recruit.setRecruitType(recruitType);
recruit.setRecruitTypeId(recruitTypeId);
recruit.setExperience(experience);
recruit.setExperienceId(experienceId);
recruit.setQualification(qualification);
recruit.setQualificationId(qualificationId);
recruit.setPayTimes(Integer.parseInt(payTimes));
recruit.setStartDate(startDate);
recruit.setExpiredDate(expiredDate);
recruit.setQualificationCode(qualificationCode);
recruit.setExperienceCode(experienceCode);
recruit.setProvince(province);
recruit.setCity(city);
recruit.setRegion(region);
recruit.setOneIndustryName(oneIndustryName);
recruit.setOneIndustryId(oneIndustryId);
recruit.setTowIndustryName(towIndustryName);
recruit.setTowIndustryId(towIndustryId);
recruit.setIndustryCode(industryCode);
recruit.setName(jsonObject.getString("jobName"));
recruit.setDetails(jsonObject.getString("jobDescribe"));
recruit.setSalaryMax(jsonObject.getInteger("jobSalaryMax"));
recruit.setSalaryMin(jsonObject.getInteger("jobSalaryMin"));
recruit.setOrgId(orgDetails.getId());
recruit.setOrgName(orgDetails.getName());
recruitService.create(recruit);
return BaseResult.success(); // 优化:确保返回字符串类型
}
}
}
public static void main(String[] args) {
// 原始带重复元素的字符串数组
String[] originalArray = {
"https://jobs.51job.com/all/coVToBZQVjDzQEaFE2UT0.html",
"https://jobs.51job.com/all/coVToBZQVjDzQEaFE2UT0.html"
};
// 4. 返回成功响应
return BaseResult.success(jsonArray); // 优化:确保返回字符串类型
// 1. 将String数组转为LinkedHashSet,自动去重且保留顺序
Set<String> stringSet = new LinkedHashSet<>(Arrays.asList(originalArray));
// 若不需要顺序,可替换为HashSet(效率略高):new HashSet<>(Arrays.asList(originalArray))
} catch (IOException e) {
return BaseResult.error("003");
// 2. 将Set转回String数组
String[] deduplicatedArray = stringSet.toArray(new String[0]);
// 打印去重结果
System.out.println("去重后的String数组:" + Arrays.toString(deduplicatedArray));
// 2. 逐个打印元素(能看到真实的字符串内容,包含引号的本质)
System.out.println("逐个打印元素:");
for (String str : deduplicatedArray) {
// 手动加引号,证明元素本身是字符串
System.out.println("\"" + str + "\",");
}
}
......
......@@ -69,7 +69,9 @@ public class OrgServiceImpl extends ServiceImpl<OrgMapper, Org> implements OrgSe
UserContext userContext = UserUtils.getUserContext();
String baseCode = userContext.getBaseCode();
org.setId(snowFlakeFactory.nextId());
if(StringUtils.isBlank(org.getId())){
org.setId(snowFlakeFactory.nextId());
}
org.setBaseCode(baseCode);
org.setDel(false);
org.setCreateTime(DateUtil.now());
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论