first commit

This commit is contained in:
spiduler
2021-07-13 17:15:12 +09:00
commit 2cdd199c48
9 changed files with 340 additions and 0 deletions

4
.gitignore vendored Normal file
View File

@@ -0,0 +1,4 @@
node_modules/
package-lock.json
.env
real.html

67
config/index.js Normal file
View File

@@ -0,0 +1,67 @@
const commandLineArgs = require('command-line-args')
, logger = require("logops");
class Config {
constructor() {
this.options = {}
this.requires = ['env', 'gateway', 'quix24']
this.init()
this.validate()
this.finalize()
}
init() {
try {
this.options = commandLineArgs([
{ name: 'env', alias: 'e', type: String, defaultValue: ['production'] },
{ name: 'host', type: String },
{ name: 'port', type: Number },
{ name: 'database', alias: 'd', type: String },
{ name: 'username', alias: 'u', type: String },
{ name: 'password', alias: 'p', type: String },
{ name: 'mongo', alias: 'm', type: String },
{ name: 'redis', alias: 'r', type: String },
{ name: 'level', alias: 'l', type: String },
{ name: 'gateway', alias: 'g', type: String },
{ name: 'quix24', alias: 'q', type: String }
]);
} catch (e) {
logger.debug('Command line arguments interpret failed :', e.message)
logger.debug('expected arguments : ', JSON.stringify(this.requires))
process.exit(1)
}
}
finalize() {
if (this.options.env == 'development') {
this.options.level = this.options.level ? this.options.level : "DEBUG"
logger.formatters.dev.omit = ['pid', 'port', 'hostname', 'app'];
// logger.format = logger.formatters.dev;
} else {
this.options.level = this.options.level ? this.options.level : "WARN"
}
logger.formatters.json.omit = ['pid', 'port', 'hostname', 'app'];
logger.setLevel(this.options.level)
logger.debug(this.options, 'Environment setting options')
}
validate() {
for (let key in this.options) {
if (this.options[key]) {
delete this.requires[this.requires.indexOf(key)]
}
}
this.requires = this.requires.filter(function (el) {
return el != null;
})
if (this.requires.length) {
logger.debug('Process terminated invalid required arguments: ', JSON.stringify(this.requires))
process.exit(1)
}
}
}
let config = new Config;
module.exports = config.options

65
config/website/Real.js Normal file
View File

@@ -0,0 +1,65 @@
class Real {
constructor() {
this.selectors = {
title: '.rd-title',
price: '.rd-buybox__price',
main_image: '.rd-gallery__container--thumbnails .swiper-slide',
// info_box: '.rd-product-description__top-accordion-content-attributes .rd-attribute-table > div',
info_box: 'div.rd-product-description__top-accordion-content > div.rd-product-description__top-accordion-content-attributes .rd-attribute-table > div',
// info_title: 'span.rd-product-description__attribute-title',
info_title: '> span',
// info_data: 'div.rd-product-description__attribute-values',
info_data: 'div > div',
description: '#rd-product-description',
waitElements: []
}
this.selectors.waitElements.push(this.selectors.main_image)
this.selectors.waitElements.push(this.selectors.info_box)
}
extract($, cb) {
let cleanup = (txt) => {
return typeof txt == 'string' ? txt.replace(/[\-\t\n\r]+/g, '').replace(/[\s]{2}/g, '').trim() : txt
}
let info = {}
$(this.selectors.info_box).map((k, el) => {
let infoData = $(el).find(this.selectors.info_data).map((k, v) => {
return cleanup($(v).text())
}).get()
info[$(el).find(this.selectors.info_title).text()] = cleanup(infoData.filter((item, index) => infoData.indexOf(item) === index).join(' | '))
})
info = Object.assign({
title: cleanup($(this.selectors.title).text()),
price: cleanup($(this.selectors.price).text()).replace(',', '.') + ($(this.selectors.price).data('cents') ? cleanup($(this.selectors.price).data('cents')) : ''),
main_img: (() => {
let images = $(this.selectors.main_image)
if (images.length) {
images = images.map((k, v) => {
// return $(v).css('background-image').slice(4, -1).replace(/"/g, "")
return $(v).find('img').attr('src')
}).get()
} else {
images = []
let img = $('.rd-product-image-gallery__slide-large-image')
if (img.length) images = [img.attr('src')]
}
return images.length ? images : null
})()
}, info)
$(this.selectors.description).find('*').each((k, elem) => this.removeAllAttributes($, elem))
info.description = '<div>' + $(this.selectors.description).html() + '</div>'
cb(info)
}
removeAllAttributes = function ($, node) {
node.attributes || Object.keys(node.attribs).map(
name => $(node).removeAttr(name)
);
return node
}
}
module.exports = new Real

53
index.js Normal file
View File

@@ -0,0 +1,53 @@
const express = require("express")
, expressLogging = require("express-logging")
, logger = require("logops")
, HttpStatus = require('http-status-codes')
, port = 32102
, productController = require('./src/controller/ProductController')
, controllers = {};
process.env["NODE_TLS_REJECT_UNAUTHORIZED"] = 0;
/**
* Express web server starting
*
*/
logger.getContext = function getContext() {
return {
app: 'Extracter'
};
}
var app = express();
app.use(express.json());
app.use(expressLogging(logger));
app.use((req, res, next) => {
logger.debug(Object.assign(req.body, req.query, req.params), 'Request params')
next()
})
app.listen(port, () => {
logger.info('Startup');
});
/**
* Request handlers
*
*/
app.all('*', (req, res, next) => {
let urlSegments = req.url.split('/');
req.params.service = urlSegments[1]
req.params.what = urlSegments.length > 2 ? urlSegments[2] : ''
req.params.args = urlSegments.length > 3 ? urlSegments.slice(3) : []
if(!controllers.hasOwnProperty(req.params.service)) {
let controller = req.params.service.charAt(0).toUpperCase() + req.params.service.slice(1) + 'Controller';
controllers[req.params.service] = productController
logger.info('%s has been loaded', controller)
}
controllers[req.params.service][req.params.service](req, result => {
let body = { code: HttpStatus.OK, message: HttpStatus.getStatusText(HttpStatus.OK), data: result }
if(result.code) body = result
res.json(body)
})
})

22
package.json Normal file
View File

@@ -0,0 +1,22 @@
{
"name": "spd-app-spider",
"version": "1.0.0",
"description": "IBP Corp Tracking service",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1",
"start": "node index.js"
},
"author": "",
"license": "ISC",
"dependencies": {
"@k3rn31p4nic/google-translate-api": "^1.1.0",
"cheerio": "^1.0.0-rc.3",
"command-line-args": "^5.1.1",
"express": "^4.17.1",
"express-logging": "^1.1.1",
"http-status-codes": "^1.4.0",
"logops": "^2.1.1",
"node-fetch": "^2.6.0"
}
}

View File

@@ -0,0 +1,53 @@
const fetch = require("node-fetch")
, logger = require("logops")
, TokenExpiredException = require('../exception/TokenExpiredException')
, config = require('../../config');
class RestClientComp {
constructor() {
this.token = null
this.gateway = 'http://' + config.quix24 + '/api/v1/'
this.clientId = 'F8Nbz9gGUvLgdn8T'
this.clientSecret = 'Fesmx7hHCCMx7Cby7FhV2fAtf2NYEFzP5fCKuwEEdpf8hpnsnnQn9qYhNHt6SsDD'
}
auth() {
let url = this.gateway + 'oauth/token?client_id=' + this.clientId + '&grant_type=client_credentials&client_secret=' + this.clientSecret
return fetch(url, { method: 'GET', headers: { 'content-type': 'application/json' } }).then(res => res.json()).then(res => {
logger.debug(res, 'Token from %s', this.gateway)
this.token = res.access_token
return res.access_token
})
}
headers(additionalheaders) {
let headers = {
"Authorization": "Bearer " + this.token,
"Content-Type": "application/json"
}
return additionalheaders ? Object.assign(headers, additionalheaders) : headers
}
response(res) {
if (res.code == 406) {
throw new TokenExpiredException("Token expired " + this.gateway)
}
return res;
}
async request(fetchable) {
if(!this.token) await this.auth()
return fetchable().catch(error => {
logger.error({message: error.message}, 'Fetchable failed with error')
if (error.status == 406) {
logger.info('Requesting token %s', this.gateway)
return this.auth().then(() => fetchable())
}
})
}
}
module.exports = RestClientComp;

View File

@@ -0,0 +1,24 @@
const fetch = require("node-fetch")
, logger = require("logops")
, productService = require('../service/ProductService');
class ProductController {
getMethodName(req) {
return req.method.toLowerCase()
+ req.params.what.split('_').map(w => w.charAt(0).toUpperCase() + w.slice(1)).join('')
}
product(req, callback) {
let fn = this.getMethodName(req);
let params = Object.assign(req.body, req.query)
params = Object.assign(params, req.params)
logger.info('Invoking method ProductService.%s', fn)
this[fn](params, callback)
}
async post(params, callback) {
productService.get(params.uri, callback)
}
}
module.exports = new ProductController;

View File

@@ -0,0 +1,16 @@
class TokenExpiredException extends Error {
constructor (message) {
super(message)
// assign the error class name in your custom error (as a shortcut)
this.name = this.constructor.name
// capturing the stack trace keeps the reference to your error class
Error.captureStackTrace(this, this.constructor);
// you may also assign additional properties to your error
this.status = 406
}
}
module.exports = TokenExpiredException

View File

@@ -0,0 +1,36 @@
const config = require("../../config")
, logger = require("logops")
, configWebsiteReal = require('../../config/website/Real')
, RestClient = require('../component/RestClientComp')
, cheerio = require('cheerio')
, fetch = require('node-fetch')
class ProductService {
constructor() {
this.restClient = new RestClient
this.restClient.gateway = 'http://' + config.gateway + '/api/v1/'
this.restClient.clientId = '7tmK4rF8As8CtRw5'
this.restClient.clientSecret = 'cgzLcVFku5zEvXkCKNZsAbxXQENFqPDx2kfjnMGd3m9BczehmFt2pw9r9MdASSFt'
}
get(uri, callback) {
return this.restClient.request(() => fetch(this.restClient.gateway + 'browse', {
method: 'POST',
headers: this.restClient.headers(),
body: JSON.stringify({
uri: uri, waitElements: configWebsiteReal.selectors.waitElements
})
}).then(res => res.text()).then(res => this.restClient.response(res))).then(html => this.extract(html, callback))
}
extract(html, callback) {
let $ = cheerio.load(html);
configWebsiteReal.extract($, callback)
}
}
const productService = new ProductService
module.exports = productService