提交 2f13c164 authored 作者: wangmenglong's avatar wangmenglong

增加爬虫

上级 306fdcac
...@@ -258,11 +258,21 @@ public class Recruit extends NameObj { ...@@ -258,11 +258,21 @@ public class Recruit extends NameObj {
@TableField(exist = false) @TableField(exist = false)
private String recordId; private String recordId;
/** /**
* 是否上架过 * 是否上架过
**/ **/
@TableField("launch") @TableField("launch")
private String launch; private String launch;
/**
* 是否批量 0否 1是
**/
@TableField("batch")
private String batch;
/**
* 是否批量发送过了 0否 1是
**/
@TableField("batch_send")
private String batchSend;
} }
...@@ -7,12 +7,14 @@ import org.springframework.boot.autoconfigure.SpringBootApplication; ...@@ -7,12 +7,14 @@ import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.cache.annotation.EnableCaching; import org.springframework.cache.annotation.EnableCaching;
import org.springframework.cloud.client.discovery.EnableDiscoveryClient; import org.springframework.cloud.client.discovery.EnableDiscoveryClient;
import org.springframework.cloud.context.config.annotation.RefreshScope; import org.springframework.cloud.context.config.annotation.RefreshScope;
import org.springframework.scheduling.annotation.EnableAsync;
@RefreshScope @RefreshScope
@EnableDiscoveryClient @EnableDiscoveryClient
@SpringBootApplication(scanBasePackages = {"com.jfb.recruit"}) @SpringBootApplication(scanBasePackages = {"com.jfb.recruit"})
@MapperScan(basePackages = "com.jfb.recruit.mapper") @MapperScan(basePackages = "com.jfb.recruit.mapper")
@EnableCaching // 开启缓存 @EnableCaching // 开启缓存
@EnableAsync // 开启异步(需保留,用于注入线程池)
public class JfbRecruitApplication { public class JfbRecruitApplication {
public static void main(String[] args) { public static void main(String[] args) {
......
...@@ -53,6 +53,8 @@ public class RecruitController extends BaseController { ...@@ -53,6 +53,8 @@ public class RecruitController extends BaseController {
**/ **/
@PostMapping("/c") @PostMapping("/c")
public BaseResult create(@RequestBody Recruit recruit){ public BaseResult create(@RequestBody Recruit recruit){
recruit.setBatch("0");
recruit.setBatchSend("0");
return recruitService.create(recruit)?BaseResult.success("normal_001"):BaseResult.error("error_001"); return recruitService.create(recruit)?BaseResult.success("normal_001"):BaseResult.error("error_001");
} }
......
...@@ -10,10 +10,13 @@ import com.jfb.recruit.util.Excelutils.ExcelAppendUtil; ...@@ -10,10 +10,13 @@ import com.jfb.recruit.util.Excelutils.ExcelAppendUtil;
import com.jfb.recruit.util.Excelutils.JobsExcel; import com.jfb.recruit.util.Excelutils.JobsExcel;
import com.jfb.recruit.util.Excelutils.OrgExcel; import com.jfb.recruit.util.Excelutils.OrgExcel;
import com.jfb.recruit.util.Excelutils.OrgExcelAppendUtil; import com.jfb.recruit.util.Excelutils.OrgExcelAppendUtil;
import com.jfb.recruit.xsnowflake.SnowFlakeFactory;
import data.recruit.Org; import data.recruit.Org;
import data.recruit.Recruit; import data.recruit.Recruit;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.MediaType; import org.springframework.http.MediaType;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import org.springframework.web.bind.annotation.*; import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile; import org.springframework.web.multipart.MultipartFile;
...@@ -23,10 +26,7 @@ import java.io.BufferedReader; ...@@ -23,10 +26,7 @@ import java.io.BufferedReader;
import java.io.IOException; import java.io.IOException;
import java.io.InputStreamReader; import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.util.ArrayList; import java.util.*;
import java.util.List;
import static sun.font.CreatedFontTracker.MAX_FILE_SIZE;
/** /**
* @author wangmenglong * @author wangmenglong
...@@ -40,6 +40,12 @@ public class ImportRecruitController extends BaseController { ...@@ -40,6 +40,12 @@ public class ImportRecruitController extends BaseController {
@Resource @Resource
RecruitService recruitService; RecruitService recruitService;
@Autowired
SnowFlakeFactory snowFlakeFactory;
@Autowired
private ThreadPoolTaskExecutor taskExecutor;
@Resource @Resource
OrgService orgService; OrgService orgService;
...@@ -48,7 +54,7 @@ public class ImportRecruitController extends BaseController { ...@@ -48,7 +54,7 @@ public class ImportRecruitController extends BaseController {
private static final String EXCEL_FILE_ORG_NAME = "/pchongshuju/showOrg.xlsx"; private static final String EXCEL_FILE_ORG_NAME = "/pchongshuju/showOrg.xlsx";
/** /**
* 文件上传接口 * 文件上传接口导出excel
* @param file 上传的文件 * @param file 上传的文件
* @param remark 备注(可选,前端传) * @param remark 备注(可选,前端传)
* @return 上传结果 * @return 上传结果
...@@ -126,7 +132,7 @@ public class ImportRecruitController extends BaseController { ...@@ -126,7 +132,7 @@ public class ImportRecruitController extends BaseController {
} }
/** /**
* 文件上传接口 * 文件上传接口导出excel
* @param file 上传的文件 * @param file 上传的文件
* @param remark 备注(可选,前端传) * @param remark 备注(可选,前端传)
* @return 上传结果 * @return 上传结果
...@@ -269,6 +275,9 @@ public class ImportRecruitController extends BaseController { ...@@ -269,6 +275,9 @@ public class ImportRecruitController extends BaseController {
@PostMapping(value = "/upload/51job/org/json", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) @PostMapping(value = "/upload/51job/org/json", consumes = MediaType.MULTIPART_FORM_DATA_VALUE)
public BaseResult uploadJsonOrgFile(HttpServletRequest req,@RequestParam("file") MultipartFile file) { public BaseResult uploadJsonOrgFile(HttpServletRequest req,@RequestParam("file") MultipartFile file) {
// 1. 提交异步任务到线程池(后台执行)
taskExecutor.execute(() -> {
String regionCode = req.getParameter("regionCode"); String regionCode = req.getParameter("regionCode");
String province = req.getParameter("province"); String province = req.getParameter("province");
String city = req.getParameter("city"); String city = req.getParameter("city");
...@@ -276,20 +285,20 @@ public class ImportRecruitController extends BaseController { ...@@ -276,20 +285,20 @@ public class ImportRecruitController extends BaseController {
// 1. 校验文件是否为空 // 1. 校验文件是否为空
if (file.isEmpty()) { if (file.isEmpty()) {
return BaseResult.error("001"); System.out.println("文件为空------------------------------------------------------");
} }
// 2. 校验文件类型(增强严谨性,避免OOXML错误) // 2. 校验文件类型(增强严谨性,避免OOXML错误)
String originalFilename = file.getOriginalFilename(); String originalFilename = file.getOriginalFilename();
// 优化:双重校验(后缀+内容类型,防止改后缀的非JSON文件) // 优化:双重校验(后缀+内容类型,防止改后缀的非JSON文件)
if (originalFilename == null || !originalFilename.trim().toLowerCase().endsWith(".json")) { if (originalFilename == null || !originalFilename.trim().toLowerCase().endsWith(".json")) {
return BaseResult.error("002"); System.out.println("格式错误------------------------------------------------------");
} }
// 可选:额外校验Content-Type,进一步防止非JSON文件 // 可选:额外校验Content-Type,进一步防止非JSON文件
String contentType = file.getContentType(); String contentType = file.getContentType();
if (contentType != null && !contentType.equalsIgnoreCase("application/json") if (contentType != null && !contentType.equalsIgnoreCase("application/json")
&& !contentType.equalsIgnoreCase("text/plain")) { && !contentType.equalsIgnoreCase("text/plain")) {
return BaseResult.error("002"); System.out.println("格式错误------------------------------------------------------");
} }
// 3. 读取文件全部内容(确保流仅用于文本读取,避免被Office解析库误处理) // 3. 读取文件全部内容(确保流仅用于文本读取,避免被Office解析库误处理)
...@@ -308,6 +317,12 @@ public class ImportRecruitController extends BaseController { ...@@ -308,6 +317,12 @@ public class ImportRecruitController extends BaseController {
for (int i = 0; i < jsonArray.size(); i++) { for (int i = 0; i < jsonArray.size(); i++) {
try {
Thread.sleep(500);
}catch (Exception e){
System.out.println("上传失败------------------------------------------------------");
}
JSONObject jsonObject = jsonArray.getJSONObject(i); JSONObject jsonObject = jsonArray.getJSONObject(i);
Org orgDetails = orgService.getName(jsonObject.getString("fullCompanyName")); Org orgDetails = orgService.getName(jsonObject.getString("fullCompanyName"));
...@@ -328,12 +343,15 @@ public class ImportRecruitController extends BaseController { ...@@ -328,12 +343,15 @@ public class ImportRecruitController extends BaseController {
} }
// 4. 返回成功响应 System.out.println("上传完成------------------------------------------------------");
return BaseResult.success(jsonArray); // 优化:确保返回字符串类型
} catch (IOException e) { } catch (IOException e) {
return BaseResult.error("003"); System.out.println("io错误------------------------------------------------------");
} }
});
return BaseResult.success(); // 优化:确保返回字符串类型
} }
/** /**
...@@ -345,6 +363,9 @@ public class ImportRecruitController extends BaseController { ...@@ -345,6 +363,9 @@ public class ImportRecruitController extends BaseController {
@PostMapping(value = "/upload/51job/json", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) @PostMapping(value = "/upload/51job/json", consumes = MediaType.MULTIPART_FORM_DATA_VALUE)
public BaseResult uploadJsonFile(HttpServletRequest req,@RequestParam("file") MultipartFile file) { public BaseResult uploadJsonFile(HttpServletRequest req,@RequestParam("file") MultipartFile file) {
// 1. 提交异步任务到线程池(后台执行)
taskExecutor.execute(() -> {
String recruitTypeId = req.getParameter("recruitTypeId"); String recruitTypeId = req.getParameter("recruitTypeId");
String recruitType = req.getParameter("recruitType"); String recruitType = req.getParameter("recruitType");
String experienceId = req.getParameter("experienceId"); String experienceId = req.getParameter("experienceId");
...@@ -364,23 +385,24 @@ public class ImportRecruitController extends BaseController { ...@@ -364,23 +385,24 @@ public class ImportRecruitController extends BaseController {
String towIndustryName = req.getParameter("towIndustryName"); String towIndustryName = req.getParameter("towIndustryName");
String towIndustryId = req.getParameter("towIndustryId"); String towIndustryId = req.getParameter("towIndustryId");
String industryCode = req.getParameter("industryCode"); String industryCode = req.getParameter("industryCode");
String regionCode = req.getParameter("regionCode");
// 1. 校验文件是否为空 // 1. 校验文件是否为空
if (file.isEmpty()) { if (file.isEmpty()) {
return BaseResult.error("001"); System.out.println("文件为空------------------------------------------------------");
} }
// 2. 校验文件类型(增强严谨性,避免OOXML错误) // 2. 校验文件类型(增强严谨性,避免OOXML错误)
String originalFilename = file.getOriginalFilename(); String originalFilename = file.getOriginalFilename();
// 优化:双重校验(后缀+内容类型,防止改后缀的非JSON文件) // 优化:双重校验(后缀+内容类型,防止改后缀的非JSON文件)
if (originalFilename == null || !originalFilename.trim().toLowerCase().endsWith(".json")) { if (originalFilename == null || !originalFilename.trim().toLowerCase().endsWith(".json")) {
return BaseResult.error("002"); System.out.println("文件类型错误------------------------------------------------------");
} }
// 可选:额外校验Content-Type,进一步防止非JSON文件 // 可选:额外校验Content-Type,进一步防止非JSON文件
String contentType = file.getContentType(); String contentType = file.getContentType();
if (contentType != null && !contentType.equalsIgnoreCase("application/json") if (contentType != null && !contentType.equalsIgnoreCase("application/json")
&& !contentType.equalsIgnoreCase("text/plain")) { && !contentType.equalsIgnoreCase("text/plain")) {
return BaseResult.error("002"); System.out.println("文件类型错误------------------------------------------------------");
} }
// 3. 读取文件全部内容(确保流仅用于文本读取,避免被Office解析库误处理) // 3. 读取文件全部内容(确保流仅用于文本读取,避免被Office解析库误处理)
...@@ -403,6 +425,18 @@ public class ImportRecruitController extends BaseController { ...@@ -403,6 +425,18 @@ public class ImportRecruitController extends BaseController {
Org orgDetails = orgService.getName(jsonObject.getString("fullCompanyName")); Org orgDetails = orgService.getName(jsonObject.getString("fullCompanyName"));
if(orgDetails==null){
orgDetails = new Org();
orgDetails.setName(jsonObject.getString("fullCompanyName"));
orgDetails.setDescr(jsonObject.getString("fullCompanyName")+"诚聘");
orgDetails.setRegionCode(regionCode);
orgDetails.setRegion(region);
orgDetails.setProvince(province);
orgDetails.setCity(city);
orgDetails.setId(snowFlakeFactory.nextId());
orgService.create(orgDetails);
}
if(orgDetails!=null&& StringUtils.isNotBlank(orgDetails.getId())){ if(orgDetails!=null&& StringUtils.isNotBlank(orgDetails.getId())){
Recruit recruit = new Recruit(); Recruit recruit = new Recruit();
...@@ -431,17 +465,57 @@ public class ImportRecruitController extends BaseController { ...@@ -431,17 +465,57 @@ public class ImportRecruitController extends BaseController {
recruit.setSalaryMin(jsonObject.getInteger("jobSalaryMin")); recruit.setSalaryMin(jsonObject.getInteger("jobSalaryMin"));
recruit.setOrgId(orgDetails.getId()); recruit.setOrgId(orgDetails.getId());
recruit.setOrgName(orgDetails.getName()); recruit.setOrgName(orgDetails.getName());
recruit.setBatch("1");
recruit.setBatchSend("0");
recruit.setStatus("up");
recruitService.create(recruit); recruitService.create(recruit);
try {
Thread.sleep(1000);
recruitService.sendAli(recruit);
}catch (Exception e){
e.printStackTrace();
}
} }
} }
// 4. 返回成功响应 // 4. 返回成功响应
return BaseResult.success(jsonArray); // 优化:确保返回字符串类型 System.out.println("完成导入------------------------------------------------------");
} catch (IOException e) { } catch (IOException e) {
return BaseResult.error("003"); System.out.println("io异常------------------------------------------------------");
}
});
return BaseResult.success(); // 优化:确保返回字符串类型
}
public static void main(String[] args) {
// 原始带重复元素的字符串数组
String[] originalArray = {
"https://jobs.51job.com/all/coVToBZQVjDzQEaFE2UT0.html",
"https://jobs.51job.com/all/coVToBZQVjDzQEaFE2UT0.html"
};
// 1. 将String数组转为LinkedHashSet,自动去重且保留顺序
Set<String> stringSet = new LinkedHashSet<>(Arrays.asList(originalArray));
// 若不需要顺序,可替换为HashSet(效率略高):new HashSet<>(Arrays.asList(originalArray))
// 2. 将Set转回String数组
String[] deduplicatedArray = stringSet.toArray(new String[0]);
// 打印去重结果
System.out.println("去重后的String数组:" + Arrays.toString(deduplicatedArray));
// 2. 逐个打印元素(能看到真实的字符串内容,包含引号的本质)
System.out.println("逐个打印元素:");
for (String str : deduplicatedArray) {
// 手动加引号,证明元素本身是字符串
System.out.println("\"" + str + "\",");
} }
} }
......
...@@ -69,7 +69,9 @@ public class OrgServiceImpl extends ServiceImpl<OrgMapper, Org> implements OrgSe ...@@ -69,7 +69,9 @@ public class OrgServiceImpl extends ServiceImpl<OrgMapper, Org> implements OrgSe
UserContext userContext = UserUtils.getUserContext(); UserContext userContext = UserUtils.getUserContext();
String baseCode = userContext.getBaseCode(); String baseCode = userContext.getBaseCode();
if(StringUtils.isBlank(org.getId())){
org.setId(snowFlakeFactory.nextId()); org.setId(snowFlakeFactory.nextId());
}
org.setBaseCode(baseCode); org.setBaseCode(baseCode);
org.setDel(false); org.setDel(false);
org.setCreateTime(DateUtil.now()); org.setCreateTime(DateUtil.now());
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论