first commit

This commit is contained in:
spiduler
2021-07-13 17:11:27 +09:00
commit d36fd3864c
17 changed files with 5217 additions and 0 deletions

4749
a.html Normal file

File diff suppressed because it is too large Load Diff

4
anything.js Normal file
View File

@@ -0,0 +1,4 @@
{
// all images in page
images: $('img').map((k, img) => $(img).attr('src')).filter(img => img.length > 10)
}

1
diagram/Diagram.gliffy Normal file

File diff suppressed because one or more lines are too long

BIN
diagram/Diagram.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 36 KiB

1
docker/.env Normal file
View File

@@ -0,0 +1 @@
SOURCE_VOLUME=../../:/var/www/crawling

View File

@@ -0,0 +1,173 @@
version: "3.1"
services:
spiduler-nginx:
image: nginx:alpine
container_name: spiduler-nginx
ports:
- "32100:80"
- "449"
volumes:
- ./nginx/spiduler.conf:/etc/nginx/conf.d/default.conf
restart: unless-stopped
env_file: ./.env
depends_on:
- spiduler-gateway
spiduler-tor:
image: dperson/torproxy
container_name: spiduler-tor
hostname: spiduler-tor
privileged: true
shm_size: 2g
restart: unless-stopped
env_file: ./.env
spiduler-chrome:
image: selenium/standalone-chrome
container_name: spiduler-chrome
hostname: spiduler-chrome
privileged: true
shm_size: 2g
restart: unless-stopped
env_file: ./.env
environment:
# - HUB_HOST=spiduler-chrome
# - HUB_PORT=4444
- JAVA_OPTS=-Dwebdriver.chrome.whitelistedIps=
# - SE_OPTS=-debug
# Node apps
spiduler-gateway:
image: node:lts-alpine
container_name: spiduler-gateway
volumes:
- "${SOURCE_VOLUME}"
entrypoint: node /var/www/spiduler/spd-app-gateway --env development --redis ibpcorp.c-xtra.com:49023 --mongo ibpcorp.c-xtra.com:49024 --gateway spiduler-nginx:80 --quix24 ibpcorp-nginx:31000
restart: unless-stopped
env_file: ./.env
networks:
- ibpcorp
- default
spiduler-browser:
image: node:lts-alpine
container_name: spiduler-browser
volumes:
- "${SOURCE_VOLUME}"
entrypoint: node /var/www/spiduler/spd-app-browser --env development --redis ibpcorp.c-xtra.com:49023 --mongo ibpcorp.c-xtra.com:49024 --gateway spiduler-nginx:80 --quix24 ibpcorp-nginx:31000
restart: unless-stopped
env_file: ./.env
depends_on:
- spiduler-nginx
- spiduler-chrome
spiduler-extractor:
image: node:lts-alpine
container_name: spiduler-extractor
volumes:
- "${SOURCE_VOLUME}"
entrypoint: node /var/www/spiduler/spd-app-extractor --env development --redis ibpcorp.c-xtra.com:49023 --mongo ibpcorp.c-xtra.com:49024 --gateway spiduler-nginx:80 --quix24 ibpcorp-nginx:31000
restart: unless-stopped
env_file: ./.env
depends_on:
- spiduler-browser
spiduler-manager:
image: node:lts-alpine
container_name: spiduler-manager
volumes:
- "${SOURCE_VOLUME}"
entrypoint: node /var/www/spiduler/spd-app-manager --env development --redis ibpcorp.c-xtra.com:49023 --mongo ibpcorp.c-xtra.com:49024 --gateway spiduler-nginx:80 --quix24 ibpcorp-nginx:31000
restart: unless-stopped
env_file: ./.env
depends_on:
- spiduler-browser
spiduler-image:
image: node:lts-alpine
container_name: spiduler-image
volumes:
- "${SOURCE_VOLUME}"
entrypoint: node /var/www/spiduler/spd-app-image --env development --redis ibpcorp.c-xtra.com:49023 --mongo ibpcorp.c-xtra.com:49024 --gateway spiduler-nginx:80 --quix24 ibpcorp-nginx:31000
restart: unless-stopped
env_file: ./.env
depends_on:
- spiduler-nginx
networks:
- ibpcorp
- default
spiduler-godo:
image: node:lts-alpine
container_name: spiduler-godo
volumes:
- "${SOURCE_VOLUME}"
entrypoint: node /var/www/spiduler/spd-app-godo --env development --redis ibpcorp.c-xtra.com:49023 --mongo ibpcorp.c-xtra.com:49024 --gateway spiduler-nginx:80 --quix24 ibpcorp-nginx:31000
restart: unless-stopped
env_file: ./.env
depends_on:
- spiduler-nginx
spiduler-updater:
image: node:lts-alpine
container_name: spiduler-updater
volumes:
- "${SOURCE_VOLUME}"
entrypoint: node /var/www/spiduler/spd-app-updater --env development --redis ibpcorp.c-xtra.com:49023 --mongo ibpcorp.c-xtra.com:49024 --gateway spiduler-nginx:80 --quix24 ibpcorp-nginx:31000
restart: unless-stopped
env_file: ./.env
depends_on:
- spiduler-nginx
# spiduler-cache:
# image: node:lts-alpine
# container_name: spiduler-cache
# volumes:
# - "${SOURCE_VOLUME}"
# entrypoint: node /var/www/spiduler/spd-app-cache --env development --redis ibpcorp.c-xtra.com:49023 --mongo ibpcorp.c-xtra.com:49024 --gateway spiduler-nginx:80 --quix24 ibpcorp-nginx:31000
# restart: unless-stopped
# env_file: ./.env
# depends_on:
# - spiduler-nginx
spiduler-external:
image: node:lts-alpine
container_name: spiduler-external
volumes:
- "${SOURCE_VOLUME}"
entrypoint: node /var/www/spiduler/spd-app-external --env development --redis ibpcorp.c-xtra.com:49023 --mongo ibpcorp.c-xtra.com:49024 --gateway spiduler-nginx:80 --quix24 ibpcorp-nginx:31000
restart: unless-stopped
env_file: ./.env
depends_on:
- spiduler-nginx
networks:
- ibpcorp
- default
spiduler-transltor:
image: node:lts-alpine
container_name: spiduler-translator
volumes:
- "${SOURCE_VOLUME}"
entrypoint: node /var/www/spiduler/spd-app-translator --env development --redis ibpcorp.c-xtra.com:49023 --mongo ibpcorp.c-xtra.com:49024 --gateway spiduler-nginx:80 --quix24 ibpcorp-nginx:31000
restart: unless-stopped
env_file: ./.env
depends_on:
- spiduler-nginx
networks:
default:
name: spiduler
ibpcorp:
external: true
name: ibpcorp

View File

@@ -0,0 +1,37 @@
version: "3.1"
services:
nginx:
image: nginx:alpine
container_name: nginx
ports:
- "33100:80"
- "449"
volumes:
- ./nginx/crawling.conf:/etc/nginx/conf.d/default.conf
restart: unless-stopped
env_file: ./.env
depends_on:
- gateway
gateway:
image: node:lts-alpine
container_name: cro-gateway
volumes:
- "${SOURCE_VOLUME}"
entrypoint: node /var/www/crawling/cro-app-gateway
restart: unless-stopped
env_file: ./.env
andygrace:
image: node:lts-alpine
container_name: crs-andygrace
volumes:
- "${SOURCE_VOLUME}"
entrypoint: node /var/www/crawling/crs-app-andygrace
restart: unless-stopped
env_file: ./.env
networks:
default:
name: crawling
external: true

View File

10
docker/legacy/Dockerfile Normal file
View File

@@ -0,0 +1,10 @@
FROM node:lts-alpine
LABEL author=w3c@naver.com
VOLUME /var/www/app
COPY start.sh /var/www/start.sh
RUN ["chmod", "+x", "/var/www/start.sh"]
ENTRYPOINT ["/var/www/start.sh"]

3
docker/legacy/start.sh Normal file
View File

@@ -0,0 +1,3 @@
#!/bin/sh
node /var/www/app --env development --redis solutions-ibpcorp.de:49023 --mongo solutions-ibpcorp.de:49024 --gateway spiduler-nginx:80 --quix24 solutions-ibpcorp.de:31000
exec "$@"

View File

@@ -0,0 +1,28 @@
# You may add here your
# server {
# ...
# }
# statements for each of your virtual hosts to this file
##
# You should look at the following URL's in order to grasp a solid understanding
# of Nginx configuration files in order to fully unleash the power of Nginx.
# http://wiki.nginx.org/Pitfalls
# http://wiki.nginx.org/QuickStart
# http://wiki.nginx.org/Configuration
#
# Generally, you will want to move this file somewhere, and start with a clean
# file but keep this around for reference. Or just disable in sites-enabled.
#
# Please see /usr/share/doc/nginx-doc/examples/ for more detailed examples.
##
server {
listen 80;
server_name _;
location / {
proxy_set_header X-Forwarded-For $remote_addr;
proxy_set_header Host $http_host;
proxy_pass http://cro-gateway:33100;
}
}

View File

@@ -0,0 +1,7 @@
FROM selenium/standalone-chrome
LABEL authors=w3c@naver.com
RUN apt-get update
RUN apt-get install -y curl nginx
RUN curl -sL https://deb.nodesource.com/setup_12.x -o nodesource_setup.sh && bash nodesource_setup.sh
RUN apt-get install -y nodejs

22
github/config.sh Normal file
View File

@@ -0,0 +1,22 @@
git -C spd-app-gateway config user.name spiduler
git -C spd-app-gateway config user.email spiderw3c@gmail.com
git -C spd-app-browser config user.name spiduler
git -C spd-app-browser config user.email spiderw3c@gmail.com
git -C spd-app-extractor config user.name spiduler
git -C spd-app-extractor config user.email spiderw3c@gmail.com
git -C spd-app-manager config user.name spiduler
git -C spd-app-manager config user.email spiderw3c@gmail.com
git -C spd-app-cache config user.name spiduler
git -C spd-app-cache config user.email spiderw3c@gmail.com
git -C spd-app-updater config user.name spiduler
git -C spd-app-updater config user.email spiderw3c@gmail.com
git -C spd-app-external config user.name spiduler
git -C spd-app-external config user.email spiderw3c@gmail.com
git -C spd-app-image config user.name spiduler
git -C spd-app-image config user.email spiderw3c@gmail.com
git -C spd-app-godo config user.name spiduler
git -C spd-app-godo config user.email spiderw3c@gmail.com
git -C spd-app-translator config user.name spiduler
git -C spd-app-translator config user.email spiderw3c@gmail.com
git -C spd-app-devopts config user.name spiduler
git -C spd-app-devopts config user.email spiderw3c@gmail.com

11
github/pull.sh Normal file
View File

@@ -0,0 +1,11 @@
git -C spd-app-gateway pull origin master
git -C spd-app-browser pull origin master
git -C spd-app-extractor pull origin master
git -C spd-app-manager pull origin master
git -C spd-app-cache pull origin master
git -C spd-app-updater pull origin master
git -C spd-app-external pull origin master
git -C spd-app-image pull origin master
git -C spd-app-godo pull origin master
git -C spd-app-translator pull origin master
git -C spd-app-devopts pull origin master

17
mongoQuery.msh Normal file
View File

@@ -0,0 +1,17 @@
// Kw6pSBMvxgcHTh3X
// count duplicates
db.Catalog.aggregate({ $group: { _id: { ean: '$ean', site: '$site' }, count: { $sum: 1 } } }, { $match: { count: { $gt: 1 } } }, { $count: 'ean' })
// count ean for site
db.Catalog.aggregate({ $group: { _id: { ean: '$ean', site: '$site' } } }, { $count: 'ean' })
// image empty
db.Catalog.aggregate({ $match: { site_main_img: null } }, { $count: 'ean' })
// rename field
db.Catalog.updateMany({}, { $rename: { Ean: 'ean', Site: 'site', Uri: 'url' } })
// GD4eYvJMHUkXmajs
db.Catalog.find({main_img: {$ne: null}}).limit(1)

141
restart.sh Normal file
View File

@@ -0,0 +1,141 @@
#!/bin/bash
# npmInstall="n"
# while getopts ":e:n:r:q:" opt; do
# case $opt in
# e) env="$OPTARG"
# ;;
# n) npmInstall="$OPTARG"
# ;;
# r) redis="$OPTARG"
# ;;
# q) quix24Gateway="$OPTARG"
# ;;
# \?) echo "Invalid option -$OPTARG" >&2
# ;;
# esac
# done
# cd /var/www/spiduler
# if [ $npmInstall == "y" ]
# then
# npm --prefix /var/www/spiduler/spd-app-gateway install
# npm --prefix /var/www/spiduler/spd-app-browser install
# npm --prefix /var/www/spiduler/spd-app-extractor install
# npm --prefix /var/www/spiduler/spd-app-spiduler install
# fi
# forever stopall
# forever cleanlogs
# forever --uid SPD-gateway start spd-app-gateway --env $env --redis $redis
# forever --uid SPD-browser start --killSignal=SIGTERM spd-app-browser --env $env
# forever --uid SPD-extractor start spd-app-extractor --env $env --redis $redis
# forever --uid SPD-spiduler start spd-app-spiduler --env $env --redis $redis --quix24 $quix24Gateway
sudo sh -c "truncate -s 0 /var/lib/docker/containers/*/*-json.log"
docker run -d --net spiduler --net-alias spiduler-tor --name spiduler-tor -v /var/www/cafemaster/endpoint/spiduler:/var/www/spiduler dperson/torproxy
docker run -d --net spiduler --net-alias spiduler-chrome --name spiduler-chrome -v /var/www/cafemaster/endpoint/spiduler:/var/www/spiduler selenium/standalone-chrome
docker run -d --net spiduler --net-alias spiduler-gateway --name spiduler-gateway -v /var/www/cafemaster/endpoint/spiduler/spd-app-gateway:/var/www/app ibpcorp/node:lts-alpine
docker run -d --net spiduler --net-alias spiduler-nginx --name spiduler-nginx -p 32100:80 -p 449 -v /var/www/cafemaster/endpoint/spiduler/spd-app-manager/docker/nginx/spiduler.conf:/etc/nginx/conf.d/default.conf nginx:alpine
docker run -d --net spiduler --net-alias spiduler-browser --name spiduler-browser -v /var/www/cafemaster/endpoint/spiduler/spd-app-browser:/var/www/app ibpcorp/node:lts-alpine
docker run -d --net spiduler --net-alias spiduler-extractor --name spiduler-extractor -v /var/www/cafemaster/endpoint/spiduler/spd-app-extractor:/var/www/app ibpcorp/node:lts-alpine
docker run -d --net spiduler --net-alias spiduler-manager --name spiduler-manager -v /var/www/cafemaster/endpoint/spiduler/spd-app-manager:/var/www/app ibpcorp/node:lts-alpine
docker run -d --net spiduler --net-alias spiduler-cache --name spiduler-cache -v /var/www/cafemaster/endpoint/spiduler/spd-app-cache:/var/www/app ibpcorp/node:lts-alpine
docker run -d --net spiduler --net-alias spiduler-updater --name spiduler-updater -v /var/www/cafemaster/endpoint/spiduler/spd-app-updater:/var/www/app ibpcorp/node:lts-alpine
docker run -d --net spiduler --net-alias spiduler-external --name spiduler-external -v /var/www/cafemaster/endpoint/spiduler/spd-app-external:/var/www/app ibpcorp/node:lts-alpine
docker run -d --net spiduler --net-alias spiduler-image --name spiduler-image -v /var/www/cafemaster/endpoint/spiduler/spd-app-image:/var/www/app ibpcorp/node:lts-alpine
docker run -d --net spiduler --net-alias spiduler-godo --name spiduler-godo -v /var/www/cafemaster/endpoint/spiduler/spd-app-godo:/var/www/app ibpcorp/node:lts-alpine
# Queue using redis - containers isn't connected from spiduler apps
docker network connect 3aa343ce4a74 spiduler-updater
git -C spd-app-gateway pull origin master
git -C spd-app-browser pull origin master
git -C spd-app-extractor pull origin master
git -C spd-app-manager pull origin master
git -C spd-app-cache pull origin master
git -C spd-app-updater pull origin master
git -C spd-app-external pull origin master
git -C spd-app-image pull origin master
git -C spd-app-godo pull origin master
npm --prefix spd-app-gateway i
npm --prefix spd-app-browser i
npm --prefix spd-app-extractor i
npm --prefix spd-app-manager i
npm --prefix spd-app-cache i
npm --prefix spd-app-updater i
npm --prefix spd-app-external i
npm --prefix spd-app-translator i
npm --prefix spd-app-image i
npm --prefix spd-app-godo i
git clone https://spiduler:GD4eYvJMHUkXmajs@github.com/spiduler/spd-app-gateway.git
git clone https://spiduler:GD4eYvJMHUkXmajs@github.com/spiduler/spd-app-browser.git
git clone https://spiduler:GD4eYvJMHUkXmajs@github.com/spiduler/spd-app-extractor.git
git clone https://spiduler:GD4eYvJMHUkXmajs@github.com/spiduler/spd-app-manager.git
git clone https://spiduler:GD4eYvJMHUkXmajs@github.com/spiduler/spd-app-cache.git
git clone https://spiduler:GD4eYvJMHUkXmajs@github.com/spiduler/spd-app-updater.git
git clone https://spiduler:GD4eYvJMHUkXmajs@github.com/spiduler/spd-app-external.git
git clone https://spiduler:GD4eYvJMHUkXmajs@github.com/spiduler/spd-app-image.git
git clone https://spiduler:GD4eYvJMHUkXmajs@github.com/spiduler/spd-app-godo.git
git clone https://spiduler:GD4eYvJMHUkXmajs@github.com/spiduler/spd-app-translator.git
GD4eYvJMHUkXmajs
spiduler-tor spiduler-chrome spiduler-gateway spiduler-nginx spiduler-browser spiduler-manager spiduler-extractor spiduler-cache spiduler-updater spiduler-external
docker restart $(docker ps -a --filter "name=spiduler-*" --format "{{.Names}}")
docker restart spiduler-tor spiduler-chrome spiduler-gateway spiduler-nginx spiduler-browser spiduler-manager spiduler-extractor spiduler-cache spiduler-updater spiduler-external
docker stop spiduler-tor spiduler-chrome spiduler-gateway spiduler-nginx spiduler-browser spiduler-manager spiduler-extractor spiduler-cache spiduler-updater spiduler-external
# spiduler-browser container should ready to start spider so restart again
docker logs -f --tail 10 spiduler-
docker ps -a -q --filter "name=spiduler-*" | xargs docker inspect --format='{{.LogPath}}' | xargs sudo truncate -s0
ps -C chrome | wc -l | xargs -n 1 | awk '{print ($1-1)/6}'
docker ps -a --filter "name=spiduler-*" --format "table {{.Names}}\t{{.Status}}"
# cd spd-app-spiduler/docker
# docker-compose -f docker-compose-dev.yml up
docker rm $(docker ps -a -q --filter "ancestor=dperson/torproxy")
JUFDJUIzJTAyJTVEJTk5JTE5JUYwRw==
JTVFJTE0JUZCJUVGJUQ2SSUxNiU4RCVCMyVGOSU3RSVGRCVGNSVGNyUxNSU3RCVBM0YlRDAlMkNwJUNDJUQ2JUNDVSU5MCUzRWklN0YlOEIlQUQlRTMlQURqYSVCQiUwOSUwMCVDMCUwOCVFRiVGQyVDQk0=
## ibpcorp
forever start -c php ibp-app-crypto/program.php --env production
forever start -c php ibp-app-excel/program.php --env production
forever start -c php ibp-app-delivery/program.php --env production
forever start -c php ibp-app-table/program.php --env production
forever start -c php ibp-app-member/program.php production
forever start -c php ibp-app-auth/program.php --env production
forever start -c php ibp-app-query/program.php --env production --redis 127.0.0.1:6379 --mongo 127.0.0.1:27001 --gateway 127.0.0.1:31000
forever start ibp-app-gateway
forever start ibp-app-prebooking
forever start ibp-app-scan --env production
forever start ibp-app-inventory --env production
forever start ibp-app-taxation --env production --redis 127.0.0.1:6379 --mongo 127.0.0.1:27001 --gateway 127.0.0.1:31000
forever start ibp-app-cache --env production --redis 127.0.0.1:49023 --mongo 127.0.0.1:27001 --gateway 127.0.0.1:31000
forever start ibp-app-tracking --env production --redis 127.0.0.1:49023 --mongo 127.0.0.1:49024 --gateway 127.0.0.1:31000
forever start ibp-app-catalog --env production --redis 127.0.0.1:49023 --mongo 127.0.0.1:49024 --gateway 127.0.0.1:31000
forever start ibp-app-parcel --env production --redis 127.0.0.1:49023 --mongo 127.0.0.1:49024 --gateway 127.0.0.1:31000

13
test.js Normal file
View File

@@ -0,0 +1,13 @@
const url = require('url')
let l = url.parse('/abc/asdf.php?a=1&b=2')
console.log(l)
const a = [true, false, null]
console.log(a.filter(v => v))
let msg = new Error('abc')
msg.k = {a: 1, b: 3}
console.log('sdfdsf',msg)