sudo apt-get -y install openvpn
sudo touch /etc/openvpn/credentials
sudo printf '%s\n' 'username' 'password' > /etc/openvpn/credentials
sudo sed -i 's/auth-user-pass/auth-user-pass \/etc\/openvpn\/credentials/g' /etc/openvpn/US-East.ovpn
sudo openvpn --config /etc/openvpn/US-East.ovpn
Reference:
How to Setup OpenVPN Command Line on Linux (Ubuntu)
S3 Select is focused on retrieving data from S3 using SQL:
S3 Select, enables applications to retrieve only a subset of data from an object by using simple SQL expressions. By using S3 Select to retrieve only the data needed by your application, you can achieve drastic performance increases – in many cases you can get as much as a 400% improvement compared with classic S3 re
reference: https://stackoverflow.com/a/29400789
SELECT SPID = er.session_id
,STATUS = ses.STATUS
,[Login] = ses.login_name
,Host = ses.host_name
,BlkBy = er.blocking_session_id
,DBName = DB_Name(er.database_id)
,CommandType = er.command
,ObjectName = OBJECT_NAME(st.objectid)
,CPUTime = er.cpu_time
,StartTime = er.start_time
,TimeElapsed = CAST(GETDATE() - er.start_time AS TIME)
,SQLStatement = st.text
FROM sys.dm_exec_requests er
OUTER APPLY sys.dm_exec_sql_text(er.sql_handle) st
LEFT JOIN sys.dm_exec_sessions ses
ON ses.session_id = er.session_id
LEFT JOIN sys.dm_exec_connections con
ON con.session_id = ses.session_id
WHERE st.text IS NOT NULL
reference: https://stackoverflow.com/a/54684796
WITH
CTE_Sysession (AgentStartDate)
AS
Create EFS on AWS web portal
Edit the security group of EFS to allow access from EC2 instances
Mount EFS on EC2
sudo mkdir efs
sudo chmod 777 /efs
Install amazon-efs-utils
for auto-remount
git clone https://github.com/aws/efs-utils
cd efs-utils/
./build-deb.sh
sudo apt-get -y install ./build/amazon-efs-utils*deb
Configure IAM role in EC2 (already done)
Edit /etc/fstab
fs-xxxxxxxx:/ /efs efs _netdev,tls,iam 0 0
Test mount
sudo mount -fav
Add Linux user in the other EC2's group to avoid readonly
issue
sudo usermod -a -G ubuntu guangningyu
sudo usermod -a -G guangningyu ubuntu
Reference:
1. Mount the Amazon EFS File System on the EC2 Instance and Test
2. Mounting your Amazon EFS file system automatically
3. User and Group ID Permissions for Files and Directories Within a File System
test.csv
key,a,b,c
a,1,,-1
a,2,,
a,3,,4
test.py
from pyspark.sql import SparkSession
from pyspark.sql import functions as F
spark = SparkSession \
.builder \
.appName("spark-app") \
.getOrCreate()
spark.sparkContext.setLogLevel("WARN")
df = spark.read.csv("test.csv", header=True)
res = df.groupBy(["key"]).agg(*[
F.max("a"),
F.max("b"),
F.max("c"),
F.min("a"),
F.min("b"),
F.min("c"),
])
print (res.toPandas())
spark-submit test.py
key max(a) max(b) max(c) min(a) min(b) min(c)
0 a 3 None 4 1 None -1
brew update && brew install azure-cli
az login
brew tap azure/functions
brew install azure-functions-core-tools@2
References:
Install Azure CLI on macOS
Azure/azure-functions-core-tools
Reference: Windows Server 2016 : Initial Settings : Add Local Users
Install dependencies
sudo apt-get update
sudo apt-get install automake autotools-dev fuse g++ git libcurl4-gnutls-dev libfuse-dev libssl-dev libxml2-dev make pkg-config
Install s3fs
git clone https://github.com/s3fs-fuse/s3fs-fuse.git
cd s3fs-fuse
./autogen.sh
./configure --prefix=/usr --with-openssl
make
sudo make install
which s3fs
Config credentials
echo "Your_accesskey:Your_secretkey" >> /etc/passwd-s3fs
sudo chmod 640 /etc/passwd-s3fs
Create mounting point
mkdir /mys3bucket
s3fs your_bucketname -o use_cache=/tmp -o allow_other -o uid=1001 -o mp_umask=002 -o multireq_max=5 /mys3bucket
Config mount after reboot
Add the following command in /etc/rc.local
:
/usr/local/bin/s3fs your_bucketname -o use_cache=/tmp -o allow_other -o uid=1001 -o mp_umask=002 -o multireq_max=5 /mys3bucket
Reference:
How to Mount S3 bucket on EC2 Linux Instance
# Install Nextcloud stack
sudo snap install nextcloud
# Create administrator account
sudo nextcloud.manual-install <admin_username> <admin_password>
# Configure trusted domains (only localhost by default)
sudo nextcloud.occ config:system:get trusted_domains
sudo nextcloud.occ config:system:set trusted_domains 1 --value=<dns-domain>
# Set 512M as PHP memory limit
sudo snap get nextcloud php.memory-limit # Should be 512M
sudo snap set nextcloud php.memory-limit=512M
# Set background jobs interval (e.g. checking for new emails, update RSS feeds, ...)
sudo snap set nextcloud nextcloud.cron-interval=10m # Default: 15m
sudo snap set nextcloud ports.http=81 ports.https=444
Reference:
Nextcloud on AWS
Putting the snap behind a reverse proxy
$ export RELEASE=$(curl -s https://api.github.com/repos/kubeless/kubeless/releases/latest | grep tag_name | cut -d '"' -f 4)
$ kubectl create ns kubeless
$ kubectl create -f https://github.com/kubeless/kubeless/releases/download/$RELEASE/kubeless-$RELEASE.yaml
$ kubectl get pods -n kubeless
$ kubectl get deployment -n kubeless
$ kubectl get customresourcedefinition
def hello(event, context):
print event
return event['data']
$ kubeless function deploy hello --runtime python2.7 \
--from-file test.py \
--handler test.hello
$ kubectl get functions
$ kubeless function ls
$ kubeless function call hello --data 'Hello world!'
create a file
echo This is a sample text file > sample.txt
delete a file
del file_name
move a file
move stats.doc c:\statistics
combine files
copy /b file1 + file2 file3
import pandas as pd
import pyodbc
import sqlalchemy
import urllib
def get_sqlalchemy_engine(driver, server, uid, pwd, database):
conn_str = 'DRIVER={};SERVER={};UID={};PWD={};DATABASE={}'.format(driver, server, uid, pwd, database)
quoted = urllib.parse.quote_plus(conn_str)
engine = sqlalchemy.create_engine('mssql+pyodbc:///?odbc_connect={}'.format(quoted))
return engine
if __name__ == '__main__':
# create engine
driver = 'ODBC Driver 17 for SQL Server'
server = 'xxx'
uid = 'xxx'
pwd = 'xxx'
database = 'xxx'
engine = get_sqlalchemy_engine(driver, server, uid, pwd, database)
# read excel
file_path = 'xxx'
df = pd.read_excel(file_path)
# load into SQL Server
schema_name = 'xxx'
table_name = 'xxx'
df.to_sql(table_name, schema=schema_name, con=engine, index=False, if_exists='replace')
import pyodbc
conn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};SERVER=test;DATABASE=test;UID=user;PWD=password')
cursor = conn.cursor()
for row in cursor.tables():
print(row.table_name)
Create a network named "test"
docker network create test
Create two containers using the network
docker run --name c1 --network "test" --rm --entrypoint tail mongo -f
docker run --name c2 --network "test" --rm --entrypoint tail mongo -f
Enter one container to ping the other and it will work
docker exec -it c1 bash
apt-get install iputils-ping # install command ping
root@79568c5ce391:/usr/src/app# ping c2
PING c2 (172.18.0.3) 56(84) bytes of data.
64 bytes from c2.test (172.18.0.3): icmp_seq=1 ttl=64 time=0.137 ms
64 bytes from c2.test (172.18.0.3): icmp_seq=2 ttl=64 time=0.221 ms
64 bytes from c2.test (172.18.0.3): icmp_seq=3 ttl=64 time=0.232 ms
...
Using default network or "bridge" network does not work:
docker run --name c1 --rm --entrypoint tail web_scraper:v1 -f
docker run --name c2 --rm --entrypoint tail web_scraper:v1 -f
docker run --name c1 --network "bridge" --rm --entrypoint tail web_scraper:v1
$ docker run -p 127.0.0.1:80:8080/tcp ubuntu bash
This binds port 8080 of the container to TCP port 80 on 127.0.0.1 of the host machine. You can also specify udp and sctp ports.
$ docker run --expose 80 ubuntu bash
This exposes port 80 of the container without publishing the port to the host system’s interfaces.
Set up the same locale
in both of the local laptop and remoter server:
export LC_ALL=en_US.UTF-8
export LANG=en_US.UTF-8
Use the lsblk
command to list the block devices attached to the instance
$ lsblk
NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT
xvda 202:0 0 30G 0 disk
`-xvda1 202:1 0 8G 0 part /
loop0 7:0 0 91M 1 loop /snap/core/6405
loop1 7:1 0 87.9M 1 loop /snap/core/5742
loop2 7:2 0 17.9M 1 loop /snap/amazon-ssm-agent/1068
loop3 7:3 0 16.5M 1 loop /snap/amazon-ssm-agent/784
loop4 7:4 0 18M 1 loop /snap/amazon-ssm-agent/930
Use the df -h
command to report the existing disk space usage on the file system
$ sudo df -h /dev/xvd*
Filesystem Size Used Avail Use% Mounted on
udev 488M 0 488M 0% /dev
/dev/xvda1 7.7G 7.4G 370M 96% /
Expand the modified partition using growpart
$ sudo growpart /dev/xvda 1
CHANGED: partition=1 start=2048 old: size=16775135 end=16777183 new: size=62912479,end=62914