From e105961304c27f43167f3402cdb2b6a1f01a4772 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=B8=A2=E4=BD=8E=E5=90=B8?= Date: Sat, 13 Apr 2024 23:26:26 +0800 Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=20dict=20=E4=B8=AD=E6=93=8D=E4=BD=9C?= =?UTF-8?q?=20sort=20=E5=8A=A0=E9=80=9F=E6=95=B4=E5=80=8B=E9=81=8E?= =?UTF-8?q?=E7=A8=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- AutoBuild/nrdlist.py | 57 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 45 insertions(+), 12 deletions(-) diff --git a/AutoBuild/nrdlist.py b/AutoBuild/nrdlist.py index ee271f4..995e39a 100644 --- a/AutoBuild/nrdlist.py +++ b/AutoBuild/nrdlist.py @@ -113,31 +113,64 @@ class Phase3: await self.fetch() -if __name__ == "__main__": - loop = asyncio.get_event_loop() - ph1 = Phase1() - ph2 = Phase2() - ph3 = Phase3() +class Phase4: + def __init__(self): + self.base_url = os.getenv("PHASE4_URL") + if not self.base_url: + raise ValueError("PHASE4_URL not set") + self.data: Dict[str, List[str]] = {} - task = [ph1.run(loop), ph2.run(), ph3.run()] + async def fetch(self): + now = arrow.utcnow() + async with httpx.AsyncClient() as client: + logger.info("Downloading: %s", self.base_url) + r = await client.get(self.base_url) + if r.status_code != 200: + logger.error("Download failed: %s", self.base_url) + return False + date = now.shift(days=-7).date().strftime("%Y-%m-%d") + self.data[date] = r.text.splitlines()[2:-2] - loop.run_until_complete(asyncio.gather(*task)) - logger.info("Download Complete, Now writing") + async def run(self): + await self.fetch() + + +async def write_files(data: List[dict]): base_path = pathlib.Path("nrd") if not base_path.exists(): base_path.mkdir() combined_data: Dict[str, set] = {} - for data in [ph1.data, ph2.data, ph3.data]: + for data in [ph4.data]: for key, value in data.items(): if key not in combined_data: combined_data[key] = set(value) else: combined_data[key].update(value) - sort_date = sorted(combined_data.keys(), reverse=True) + sort_date = sorted(combined_data.keys(), reverse=True)[:30] accumulate = "" for date in range(len(sort_date)): accumulate += "\n".join(combined_data[sort_date[date]]) - accumulate = "\n".join(sorted(set(accumulate.split("\n")))) - base_path.joinpath(f"past-{(date + 1):02d}day.txt").write_bytes(accumulate.encode()) + # accumulate = "\n".join(sorted(set(accumulate.split("\n")))) + base_path.joinpath(f"past-{(date + 1):02d}day.txt").write_bytes( + accumulate.encode() + ) + + +if __name__ == "__main__": + import time + start = time.time() + loop = asyncio.get_event_loop() + ph1 = Phase1() + ph2 = Phase2() + ph3 = Phase3() + ph4 = Phase4() + + task = [ph1.run(loop), ph2.run(), ph3.run(), ph4.run()] + + loop.run_until_complete(asyncio.gather(*task)) + logger.info("Download Complete, Now writing") + loop.run_until_complete(write_files([ph1.data, ph2.data, ph3.data, ph4.data])) + end = time.time() - start + logger.info(f"Time taken: {end:.2f} seconds")