Browse Source

server mode crawl

pk 6 months ago
parent
commit
1f9ab3eecb
8 changed files with 196 additions and 9 deletions
  1. 28 0
      config.server.js
  2. 3 0
      package.json
  3. 51 0
      server.js
  4. 21 5
      src/crawler.js
  5. 5 1
      src/settings.js
  6. 0 0
      tmp/done/.gitkeep
  7. 17 3
      tmp/get.sh
  8. 71 0
      yarn.lock

+ 28 - 0
config.server.js

@@ -0,0 +1,28 @@
+const EventEmitter = require('events');
+
+let app = {
+    getPath: pathCode => {
+        switch(pathCode){
+            case 'userData':
+                return '/home/node/kisybi'
+        }
+    }
+}
+
+let myEmitter = new EventEmitter();
+
+let ipc = {
+    on: (topic, listener) => {
+        myEmitter.on(topic, listener)
+    }
+}
+
+let ipb = ev => {
+    myEmitter.emit(ev)
+}
+
+module.exports = {
+    app,
+    ipc,
+    ipb
+}

+ 3 - 0
package.json

@@ -6,6 +6,7 @@
   "scripts": {
     "start": "electron .",
     "console": "node cli.js",
+    "server": "node server.js",
     "build": "electron-packager . kisybi --overwrite --asar --platform=win32 --arch=ia32 --icon=src/favicon.ico --prune=true --out=build --version-string.CompanyName=pk --version-string.FileDescription=pk --version-string.ProductName=\"KISYBI\""
   },
   "dependencies": {
@@ -25,9 +26,11 @@
     "electron-ipc-broadcast": "0.1.0",
     "moment": "2.29.4",
     "moment-timezone": "0.5.43",
+    "node-schedule": "^2.1.1",
     "openurl": "1.1.1",
     "request": "2.88.2",
     "request-promise-native": "1.0.9",
+    "shelljs": "^0.8.5",
     "sql.js": "0.5.0",
     "string-similarity": "1.2.2",
     "underscore": "1.13.6",

+ 51 - 0
server.js

@@ -0,0 +1,51 @@
+const schedule = require('node-schedule');
+const { spawn } = require('child_process');
+const { ipc } = require('./config.server.js');
+const path = require('node:path')
+
+require('./thirdparty/naturalSort.js');
+require('./src/settings.js');
+require('./src/db.js');
+require('./src/dao.js');
+require('./src/crawler.js');
+
+const app = require('./config.server.js').app
+const ipb = require('./config.server.js').ipb
+
+let crawlState = 'off';
+
+let crawlFn = async () => {
+    ipb('global:refresh')
+}
+
+ipc.on("global:refresh:done", () => {
+    let getProcess = spawn("bash", ["get.sh"], {cwd: path.join(__dirname, 'tmp')})
+
+    getProcess.stdout.pipe(process.stdout)
+    getProcess.stderr.pipe(process.stderr)
+    
+    getProcess.on('close', (code) => {
+        console.log(`child process exited with code ${code}`);
+        if(code == 0){
+
+        }
+    });
+})
+
+let crawlFnSafe = async () => {
+    try {
+        crawlState = 'on'
+        await crawlFn()
+    } catch (e) {
+        console.error("error on job", e)
+    } finally {
+        crawlState = 'off'
+    }
+}
+
+
+schedule.scheduleJob('0 0 3 * *', async () => {
+    if(crawlState === 'off'){
+        await crawlFnSafe()
+    }
+});

+ 21 - 5
src/crawler.js

@@ -4,10 +4,6 @@ let path = require('path');
 let S = require('underscore.string');
 let fs = require('fs');
 
-require('electron-ipc-broadcast').mainBroadcastListener();
-let ipb = require('electron-ipc-broadcast').default;
-let ipc = require('electron').ipcMain;
-
 let cheerio = require('cheerio');
 let request = require('request-promise-native');
 let settings = require('./settings.js');
@@ -45,6 +41,25 @@ let defaults = {
     user: 0,
 };
 
+
+let ipb;
+let ipc;
+
+try {
+    require('electron-ipc-broadcast').mainBroadcastListener();
+    ipb = require('electron-ipc-broadcast').default;
+    ipc = require('electron').ipcMain;
+} catch (e) {
+    console.error("ipc not available")
+} finally {
+    if(!ipc){
+        ipc = require('../config.server.js').ipc
+    }
+    if(!ipb){
+        ipb = require('../config.server.js').ipb
+    }
+}
+
 function doGrouping(rs, rj) {
     let halfString = function (x) {
         return x.substring(0, x.length / 2)
@@ -157,7 +172,7 @@ function doCrawl(cat, force) {
 
         var lastDates = settings().get('crawl').lastDates;
         if (_.isUndefined(lastDates)) lastDates = {};
-        var lastDate = _.has(lastDates, cat) ? lastDates[cat] : '2019-01-01';
+        var lastDate = _.has(lastDates, cat) ? lastDates[cat] : '2024-09-01';
 
         let date = moment(lastDate, 'YYYY-MM-DD').endOf('day');
         if (date.isBefore(moment())) date.add(1, 'day');
@@ -250,6 +265,7 @@ function doAllCrawl() {
                 getCat(idx + 1);
             else {
                 console.log('all cats');
+                ipb("global:refresh:done")
                 /*new Promise(doGrouping).then(() => {
                     console.log("all groups");
                     crawling = false;

+ 5 - 1
src/settings.js

@@ -1,5 +1,9 @@
 let electron = require('electron');
 let app = electron.app;
+if(!app){ //cli mode
+    app = require('../config.server.js').app
+}
+
 let path = require('path');
 let fs = require('fs');
 let _ = require('underscore');
@@ -24,7 +28,7 @@ function initConfig() {
     console.log(filepath);
     if (!fs.existsSync(filepath)) {
         try {
-            fs.mkdirSync(dir);
+            fs.mkdirSync(dir, {recursive: true});
             fs.closeSync(fs.openSync(filepath, 'w'));
         } catch (e) {
         }

+ 0 - 0
tmp/done/.gitkeep


+ 17 - 3
tmp/get.sh

@@ -1,14 +1,28 @@
-export http_proxy=http://roof:8117/
-export https_proxy=http://roof:8117/
-export no_proxy=roof
+export http_proxy=http://192.168.0.11:8117/
+export https_proxy=http://192.168.0.11:8117/
+export no_proxy=192.168.0.11
+
+mkdir -p done/_m
+mkdir -p done/_b
+mkdir m
+mkdir b
+
+echo "get all M"
 mv ml_* m
 cd m
 for f in ml_*; do cat $f; echo; done > ml
 rm -f ml_* 
 wget2 -c -i ml
+rm -f ml
+find . -maxdepth 1 -type f -exec sh -c 'mv "$0" "../done/_m/$0.torrent"' {} \;
+
 cd ..
+
+echo "get all B"
 mv bl_* b
 cd b
 for f in bl_*; do cat $f; echo; done > bl
 rm -f bl_* 
 wget2 -c -i bl
+rm -f bl
+find . -maxdepth 1 -type f -exec sh -c 'mv "$0" "../done/_b/$0.torrent"' {} \;

+ 71 - 0
yarn.lock

@@ -416,6 +416,13 @@ core-util-is@~1.0.0:
   resolved "https://registry.yarnpkg.com/core-util-is/-/core-util-is-1.0.3.tgz#a6042d3634c2b27e9328f837b965fac83808db85"
   integrity sha512-ZQBvi1DcpJ4GDqanjucZ2Hj3wEO5pZDS89BWbkcrvdxksJorwUDDZamX9ldFkp9aw2lmBDLgkObEA4DWNJ9FYQ==
 
+cron-parser@^4.2.0:
+  version "4.9.0"
+  resolved "https://registry.yarnpkg.com/cron-parser/-/cron-parser-4.9.0.tgz#0340694af3e46a0894978c6f52a6dbb5c0f11ad5"
+  integrity sha512-p0SaNjrHOnQeR8/VnfGbmg9te2kfyYSQ7Sc/j/6DtPL3JQvKxmjO9TSjNFpujqV3vEYYBvNNvXSxzyksBWAx1Q==
+  dependencies:
+    luxon "^3.2.1"
+
 cross-spawn-windows-exe@^1.1.0, cross-spawn-windows-exe@^1.2.0:
   version "1.2.0"
   resolved "https://registry.yarnpkg.com/cross-spawn-windows-exe/-/cross-spawn-windows-exe-1.2.0.tgz#46253b0f497676e766faf4a7061004618b5ac5ec"
@@ -851,6 +858,18 @@ getpass@^0.1.1:
   dependencies:
     assert-plus "^1.0.0"
 
+glob@^7.0.0:
+  version "7.2.3"
+  resolved "https://registry.yarnpkg.com/glob/-/glob-7.2.3.tgz#b8df0fb802bbfa8e89bd1d938b4e16578ed44f2b"
+  integrity sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==
+  dependencies:
+    fs.realpath "^1.0.0"
+    inflight "^1.0.4"
+    inherits "2"
+    minimatch "^3.1.1"
+    once "^1.3.0"
+    path-is-absolute "^1.0.0"
+
 glob@^7.1.6:
   version "7.2.0"
   resolved "https://registry.yarnpkg.com/glob/-/glob-7.2.0.tgz#d15535af7732e02e948f4c41628bd910293f6023"
@@ -981,6 +1000,11 @@ ini@^1.3.4:
   resolved "https://registry.yarnpkg.com/ini/-/ini-1.3.8.tgz#a29da425b48806f34767a4efce397269af28432c"
   integrity sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==
 
+interpret@^1.0.0:
+  version "1.4.0"
+  resolved "https://registry.yarnpkg.com/interpret/-/interpret-1.4.0.tgz#665ab8bc4da27a774a40584e812e3e0fa45b1a1e"
+  integrity sha512-agE4QfB2Lkp9uICn7BAqoscw4SZP9kTE2hxiFI3jBPmXJfdqiahTbUuKGsMoN2GtqL9AxhYioAcVvgsb1HvRbA==
+
 is-arrayish@^0.2.1:
   version "0.2.1"
   resolved "https://registry.yarnpkg.com/is-arrayish/-/is-arrayish-0.2.1.tgz#77c99840527aa8ecb1a8ba697b80645a7a926a9d"
@@ -1148,6 +1172,11 @@ lodash@^4.17.10, lodash@^4.17.19:
   resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.21.tgz#679591c564c3bffaae8454cf0b3df370c3d6911c"
   integrity sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==
 
+long-timeout@0.1.1:
+  version "0.1.1"
+  resolved "https://registry.yarnpkg.com/long-timeout/-/long-timeout-0.1.1.tgz#9721d788b47e0bcb5a24c2e2bee1a0da55dab514"
+  integrity sha512-BFRuQUqc7x2NWxfJBCyUrN8iYUYznzL9JROmRz1gZ6KlOIgmoD+njPVbb+VNn2nGMKggMsK79iUNErillsrx7w==
+
 lowercase-keys@^1.0.0, lowercase-keys@^1.0.1:
   version "1.0.1"
   resolved "https://registry.yarnpkg.com/lowercase-keys/-/lowercase-keys-1.0.1.tgz#6f9e30b47084d971a7c820ff15a6c5167b74c26f"
@@ -1165,6 +1194,11 @@ lru-cache@^6.0.0:
   dependencies:
     yallist "^4.0.0"
 
+luxon@^3.2.1:
+  version "3.5.0"
+  resolved "https://registry.yarnpkg.com/luxon/-/luxon-3.5.0.tgz#6b6f65c5cd1d61d1fd19dbf07ee87a50bf4b8e20"
+  integrity sha512-rh+Zjr6DNfUYR3bPwJEnuwDdqMbxZW7LOQfUN4B54+Cl+0o5zaU9RJ6bcidfDtC1cWCZXQ+nvX8bf6bAji37QQ==
+
 matcher@^3.0.0:
   version "3.0.0"
   resolved "https://registry.yarnpkg.com/matcher/-/matcher-3.0.0.tgz#bd9060f4c5b70aa8041ccc6f80368760994f30ca"
@@ -1196,6 +1230,13 @@ minimatch@^3.0.4:
   dependencies:
     brace-expansion "^1.1.7"
 
+minimatch@^3.1.1:
+  version "3.1.2"
+  resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-3.1.2.tgz#19cd194bfd3e428f049a70817c038d89ab4be35b"
+  integrity sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==
+  dependencies:
+    brace-expansion "^1.1.7"
+
 minimist@^1.2.0, minimist@^1.2.5:
   version "1.2.5"
   resolved "https://registry.yarnpkg.com/minimist/-/minimist-1.2.5.tgz#67d66014b66a6a8aaa0c083c5fd58df4e4e97602"
@@ -1240,6 +1281,15 @@ ms@^2.1.1:
   resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.3.tgz#574c8138ce1d2b5861f0b44579dbadd60c6615b2"
   integrity sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==
 
+node-schedule@^2.1.1:
+  version "2.1.1"
+  resolved "https://registry.yarnpkg.com/node-schedule/-/node-schedule-2.1.1.tgz#6958b2c5af8834954f69bb0a7a97c62b97185de3"
+  integrity sha512-OXdegQq03OmXEjt2hZP33W2YPs/E5BcFQks46+G2gAxs4gHOIVD1u7EqlYLYSKsaIpyKCK9Gbk0ta1/gjRSMRQ==
+  dependencies:
+    cron-parser "^4.2.0"
+    long-timeout "0.1.1"
+    sorted-array-functions "^1.3.0"
+
 normalize-package-data@^2.3.2:
   version "2.5.0"
   resolved "https://registry.yarnpkg.com/normalize-package-data/-/normalize-package-data-2.5.0.tgz#e66db1838b200c1dfc233225d12cb36520e234a8"
@@ -1488,6 +1538,13 @@ readable-stream@^2.2.2:
     string_decoder "~1.1.1"
     util-deprecate "~1.0.1"
 
+rechoir@^0.6.2:
+  version "0.6.2"
+  resolved "https://registry.yarnpkg.com/rechoir/-/rechoir-0.6.2.tgz#85204b54dba82d5742e28c96756ef43af50e3384"
+  integrity sha512-HFM8rkZ+i3zrV+4LQjwQ0W+ez98pApMGM3HUrN04j3CqzPOzl9nmP15Y8YXNm8QHGv/eacOVEjqhmWpkRV0NAw==
+  dependencies:
+    resolve "^1.1.6"
+
 request-promise-core@1.1.4:
   version "1.1.4"
   resolved "https://registry.yarnpkg.com/request-promise-core/-/request-promise-core-1.1.4.tgz#3eedd4223208d419867b78ce815167d10593a22f"
@@ -1613,6 +1670,20 @@ shebang-regex@^3.0.0:
   resolved "https://registry.yarnpkg.com/shebang-regex/-/shebang-regex-3.0.0.tgz#ae16f1644d873ecad843b0307b143362d4c42172"
   integrity sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==
 
+shelljs@^0.8.5:
+  version "0.8.5"
+  resolved "https://registry.yarnpkg.com/shelljs/-/shelljs-0.8.5.tgz#de055408d8361bed66c669d2f000538ced8ee20c"
+  integrity sha512-TiwcRcrkhHvbrZbnRcFYMLl30Dfov3HKqzp5tO5b4pt6G/SezKcYhmDg15zXVBswHmctSAQKznqNW2LO5tTDow==
+  dependencies:
+    glob "^7.0.0"
+    interpret "^1.0.0"
+    rechoir "^0.6.2"
+
+sorted-array-functions@^1.3.0:
+  version "1.3.0"
+  resolved "https://registry.yarnpkg.com/sorted-array-functions/-/sorted-array-functions-1.3.0.tgz#8605695563294dffb2c9796d602bd8459f7a0dd5"
+  integrity sha512-2sqgzeFlid6N4Z2fUQ1cvFmTOLRi/sEDzSQ0OKYchqgoPmQBVyM3959qYx3fpS6Esef80KjmpgPeEr028dP3OA==
+
 spdx-correct@^3.0.0:
   version "3.1.1"
   resolved "https://registry.yarnpkg.com/spdx-correct/-/spdx-correct-3.1.1.tgz#dece81ac9c1e6713e5f7d1b6f17d468fa53d89a9"