#!/usr/bin/env python
import urllib2
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
def load_data():
X = []
Y = []
data_url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data'
for line in urllib2.urlopen(data_url).readlines():
line = map(float, line.split())
X.append(line[0:13])
Y.append(line[13])
return X, Y
def basic_model():
# create model
model = Sequential()
model.add(Dense(13, input_dim=13, kernel_initializer='normal', activation='relu'))
model.add(Dense(1, kernel_initializer='normal'))
# compile model
model.compile(loss='mean_squared_error', optimizer='adam')
return model
d
已逾契约书约定缴款日的延滞天数,贷放型产品自缴款截止日(通常为次一关账日)后第一天开始计算;信用卡比较特别,虽然缴款截止日约为关账日后20天,但逾期天数亦是由次一关账日后起算。
逾期1期称为M1,2期称为M2,3期称为M3……以此类推。信用卡缴款截止日与次一关账日之间虽然不计算逾期天数,但其bucket称为M0。
注意,因为每月天数不一定相同,所以各期的长短会有不同。
依bucket可分为前期(front end)、中期(middle range)、后期(hot core)、转呆账(write-off)。
stage的划分方式并无硬性规定,可依各银行的催收策略、转呆账政策与产品特性决定。以信用卡为例,一般将M1列为前期,M2-M3列为中期,M4以上列为后期,若已转列呆账者则列入转呆账。
即期指标 = 当期各bucket延滞金额 / 当期应收账款
即期指标是计算延滞率时常用的两种方法之一,其概念为分析当期应收帐款的质量结构。一般公开信息所显示的延滞率,若无特别注明,皆是以coincidental的概念计算的。
递延指标 = 当期各bucket延滞金额 / 各bucket对应的历史月份应收帐款
即期指标的分母一律是当期应收账款,不过其分子实际上是由之前的应收账款产生的;因为为了回溯逾期起源,递延指标将分母改成了相对应的之前月份的应收账款。
期末结算为信用卡特有的结算方式。因为信用卡客群最为庞大,作业处理相当耗时,许多银行会将其客户划分至不同账务周期(cycle),因此信用卡产品下通常有多个关账日。
银行必须就各个cycle客户分别管理,尤其是账务及催收单位皆以cycle为作业周期。
月底结算报表主要表达各月月底结算数据,适用于消费金融所有产品,尤其在跨产品并列分析时,为实现资料切点一致,多采用月底结算数据。
参考:《互联网金融时代:消费信贷评分建模与应用》
-- change date format
from_unixtime(unix_timestamp('20150101' ,'yyyyMMdd'), 'yyyy-MM-dd')
-- add n days
date_add('2015-11-01', 30) -- will return '2015-12-01'
-- calculate date difference
datediff('2015-12-01', '2015-11-01') -- will return 30
row_number() over (DISTRIBUTE BY... SORT BY... DESC)
analyze table xxx.yyy partition(dt = '2015-12-11') compute statistics;
describe formated xxx.yyy partition (dt = '2015-12-11');
from sklearn.metrics.pairwise import euclidean_distances
euclidean_distances([[1,2,3], [100,200,300]])
# return:
# array([[ 0. , 370.42408129],
# [370.42408129, 0. ]])
from sklearn.metrics.pairwise import cosine_similarity
cosine_similarity([[1,2,3],[100,200,300]])
# return:
# array([[1., 1.],
# [1., 1.]])
from scipy.stats.stats import pearsonr
pearsonr([1,2,3], [100,200,300])
# return ('1.0', 0.0) // (Pearson’s correlation coefficient, 2-tailed p-value)
apt-get install python-pip
pip install shadowsocks
Create config file /etc/shadowsocks.json
:
{
"server":"your_ip_address",
"server_port":8388,
"local_address": "127.0.0.1",
"local_port":1080,
"password":"your_password",
"timeout":300,
"method":"aes-256-cfb",
"fast_open": false
}
You can set multiple ports in the config file:
{
"server": "your_ip_address",
"local_address": "127.0.0.1",
"local_port": "1080",
"port_password": {
"8381": "password_1",
"8388": "password_2"
},
"timeout": 300,
"method": "aes-256-cfb"
}
ssserver -c /etc/shadowsocks.json
# run at background
ssserver -c /etc/shadowsocks.json -d start
ssserver -c /etc/shadowsocks.json -d stop
Edit /etc/rc.local
:
/usr/local/bin/ssserver -c /etc/shadowsocks.json -d start
exit 0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import urllib2
from numpy import mat, ones, shape, exp, array, arange
import matplotlib.pyplot as plt
def createDataSet():
features = []
labels = []
lines = urllib2.urlopen('https://raw.github.com/pbharrin/machinelearninginaction/master/Ch05/testSet.txt').readlines()
for line in lines:
line = line.strip().split()
features.append([1.0, float(line[0]), float(line[1])]) # set x0 to 1.0
labels.append(int(line[2]))
return features, labels
def sigmoid(value):
return 1.0 / (1 + exp(-value))
def gradAscent(features, labels, alpha=0.001, iterations=500):
'''
梯度上升算法:
- 批处理算法:每次更新回归系数时都需要遍历整个数据集
'''
featureMatrix = mat(features)
labelMatrix = mat(labels).transpose()
m, n = shape(featureMatrix)
weights = ones((n, 1))
for k in range(iterations):
h = sigmoid(featureMatrix*weights)
error = (labelMatrix - h)
weig
awk '{printf("%d",sqrt($1*$1))}' test.csv
echo "4 105" | awk 'function max(a,b){return a>b?a:b}{print max($1, $2)}'