脚本 script 常用脚本
目录
- remove_all_pyc
- find_all_links
- rename_with_slice
- load_json_without_dupes
- execution_time
- benchmark_permissions_loading_django
- basic_email_web_crawler
- basic_link_web_crawler
- find_files_recursively
- optimize_images_with_wand
- csv_split
- random_name_generator
- html_to_markdown
- check_my_environment
- jinja_quick_load
- rewrite_git_history
- zipper
- 查找当前文件下面所有文件中是否包含某个字符
- linux 挂载samba同步文件
- 根据端口号查看进程
- ubuntu:查看进程占用端口号
- Linux登录ssh携带密码
- 非交互式的sudo执行
- 远程主机的ssh操作
remove_all_pyc
find . -name "*.pyc" -exec git rm -f {} \;
find_all_links
import requests
import re
# get url
url = input('Enter a URL (include `http://`): ')
# connect to the url
website = requests.get(url)
# read html
html = website.text
# use re.findall to grab all the links
links = re.findall('"((http|ftp)s?://.*?)"', html)
# output links
for link in links:
print(link[0])
rename_with_slice
import os
import glob
os.chdir("/Users/mikeherman/repos/bugs/se-platform/se/core/permissions")
for file in glob.glob("*.json"):
file_name = os.path.splitext(file)[0]
extension = os.path.splitext(file)[1]
new_file_name = file_name[:-6] + extension
try:
os.rename(file, new_file_name)
except OSError as e:
print(e)
else:
print("Renamed {} to {}".format(file, new_file_name))
load_json_without_dupes
def dict_raise_on_duplicates(ordered_pairs):
"""reject duplicate keys"""
my_dict = dict()
for key, values in ordered_pairs:
if key in my_dict:
raise ValueError("Duplicate key: {}".format(key,))
else:
my_dict[key] = values
return my_dict
execution_time
"""
ExecutionTime
This class is used for timing execution of code.
For example:
timer = ExecutionTime()
print 'Hello world!'
print 'Finished in {} seconds.'.format(timer.duration())
"""
import time
import random
class ExecutionTime:
def __init__(self):
self.start_time = time.time()
def duration(self):
return time.time() - self.start_time
# ---- run code ---- #
timer = ExecutionTime()
sample_list = list()
my_list = [random.randint(1, 888898) for num in
range(1, 1000000) if num % 2 == 0]
print('Finished in {} seconds.'.format(timer.duration()))
benchmark_permissions_loading_django
import os
import time
import numpy
# temp file for benchmarking
def timeit(method):
def timed(*args, **kw):
ts = time.time()
result = method(*args, **kw)
te = time.time()
all_times.append(te - ts)
print(all_times)
print(numpy.mean(all_times))
return result
return timed
def create_new_db():
os.system("mysqladmin -u root drop DATABASE_NAME -f")
os.system("mysqladmin -u root create DATABASE_NAME -f")
os.system("./manage.py syncdb")
os.system("./manage.py migrate")
@timeit
def load_new_perms():
os.system("./manage.py LOAD_PERMS_COMMAND")
if __name__ == "__main__":
n = 0
all_times = list()
while n < 10:
create_new_db()
load_new_perms()
n += 1
basic_email_web_crawler
import requests
import re
# get url
url = input('Enter a URL (include `http://`): ')
# connect to the url
website = requests.get(url)
# read html
html = website.text
# use re.findall to grab all the links
links = re.findall('"((http|ftp)s?://.*?)"', html)
emails = re.findall('([\w\.,]+@[\w\.,]+\.\w+)', html)
# print the number of links in the list
print("\nFound {} links".format(len(links)))
for email in emails:
print(email)
basic_link_web_crawler
import requests
import re
try:
from urllib.parse import urljoin
except ImportError:
from urlparse import urljoin
# regex
link_re = re.compile(r'href="(.*?)"')
def crawl(url):
req = requests.get(url)
# Check if successful
if(req.status_code != 200):
return []
# Find links
links = link_re.findall(req.text)
print("\nFound {} links".format(len(links)))
# Search links for emails
for link in links:
# Get an absolute URL for a link
link = urljoin(url, link)
print(link)
if __name__ == '__main__':
crawl('http://www.realpython.com')
find_files_recursively
import fnmatch
import os
# constants
PATH = './'
PATTERN = '*.md'
def get_file_names(filepath, pattern):
matches = []
if os.path.exists(filepath):
for root, dirnames, filenames in os.walk(filepath):
for filename in fnmatch.filter(filenames, pattern):
# matches.append(os.path.join(root, filename)) # full path
matches.append(os.path.join(filename)) # just file name
if matches:
print("Found {} files:".format(len(matches)))
output_files(matches)
else:
print("No files found.")
else:
print("Sorry that path does not exist. Try again.")
def output_files(list_of_files):
for filename in list_of_files:
print(filename)
if __name__ == '__main__':
get_file_names(PATH, PATTERN)
optimize_images_with_wand
import fnmatch
import os
# pip install Wand
from wand.image import Image
# pip install http://pypi.python.org/packages/source/h/hurry.filesize/hurry.filesize-0.9.tar.gz
from hurry.filesize import size
# constants
PATH = '/../../../..'
PATTERN = '*.jpg'
def get_image_file_names(filepath, pattern):
matches = []
if os.path.exists(filepath):
for root, dirnames, filenames in os.walk(filepath):
for filename in fnmatch.filter(filenames, pattern):
matches.append(os.path.join(root, filename)) # full path
if matches:
print("Found {} files, with a total file size of {}.".format(
len(matches), get_total_size(matches)))
return matches
else:
print("No files found.")
else:
print("Sorry that path does not exist. Try again.")
def get_total_size(list_of_image_names):
total_size = 0
for image_name in list_of_image_names:
total_size += os.path.getsize(image_name)
return size(total_size)
def resize_images(list_of_image_names):
print("Optimizing ... ")
for index, image_name in enumerate(list_of_image_names):
with open(image_name) as f:
image_binary = f.read()
with Image(blob=image_binary) as img:
if img.height >= 600:
img.transform(resize='x600')
img.save(filename=image_name)
print("Optimization complete.")
if __name__ == '__main__':
all_images = get_image_file_names(PATH, PATTERN)
resize_images(all_images)
get_image_file_names(PATH, PATTERN)
csv_split
import sys
import os
import csv
import argparse
"""
Splits a CSV file into multiple files based on command line arguments.
Arguments:
`-h`: help file of usage of the script
`-i`: input file name
`-o`: output file name
`-r`: row limit to split
Default settings:
`output_path` is the current directory
headers are displayed on each split file
the default delimeter is a comma
Example usage:
# split csv by every 100 rows
>> python csv_split.py -i input.csv -o output -r 10
"""
def get_arguments():
"""Grab user supplied arguments using the argparse library."""
# Use arparse to get command line arguments
parser = argparse.ArgumentParser()
parser.add_argument("-i", "--input_file", required=True,
help="csv input file (with extension)", type=str)
parser.add_argument("-o", "--output_file", required=True,
help="csv output file (without extension)", type=str)
parser.add_argument("-r", "--row_limit", required=True,
help="row limit to split csv at", type=int)
args = parser.parse_args()
# Check if the input_file exits
is_valid_file(parser, args.input_file)
# Check if the input_file is valid
is_valid_csv(parser, args.input_file, args.row_limit)
return args.input_file, args.output_file, args.row_limit
def is_valid_file(parser, file_name):
"""Ensure that the input_file exists."""
if not os.path.exists(file_name):
parser.error("The file '{}' does not exist!".format(file_name))
sys.exit(1)
def is_valid_csv(parser, file_name, row_limit):
"""
Ensure that the # of rows in the input_file
is greater than the row_limit.
"""
row_count = 0
for row in csv.reader(open(file_name)):
row_count += 1
# Note: You could also use a generator expression
# and the sum() function to count the rows:
# row_count = sum(1 for row in csv.reader(open(file_name)))
if row_limit > row_count:
parser.error(
"The 'row_count' of '{}' is > the number of rows in '{}'!"
.format(row_limit, file_name)
)
sys.exit(1)
def parse_file(arguments):
"""
Splits the CSV into multiple files or chunks based on the row_limit.
Then create new CSV files.
"""
input_file = arguments[0]
output_file = arguments[1]
row_limit = arguments[2]
output_path = '.' # Current directory
# Read CSV, split into list of lists
with open(input_file, 'r') as input_csv:
datareader = csv.reader(input_csv)
all_rows = []
for row in datareader:
all_rows.append(row)
# Remove header
header = all_rows.pop(0)
# Split list of list into chunks
current_chunk = 1
for i in range(0, len(all_rows), row_limit): # Loop through list
chunk = all_rows[i:i + row_limit] # Create single chunk
current_output = os.path.join( # Create new output file
output_path,
"{}-{}.csv".format(output_file, current_chunk)
)
# Add header
chunk.insert(0, header)
# Write chunk to output file
with open(current_output, 'w') as output_csv:
writer = csv.writer(output_csv)
writer = writer.writerows(chunk)
# Output info
print("")
print("Chunk # {}:".format(current_chunk))
print("Filepath: {}".format(current_output))
print("# of rows: {}".format(len(chunk)))
# Create new chunk
current_chunk += 1
if __name__ == "__main__":
arguments = get_arguments()
parse_file(arguments)
random_name_generator
from random import randint
def random_name_generator(first, second, x):
"""
Generates random names.
Arguments:
- list of first names
- list of last names
- number of random names
"""
names = []
for i in range(0, int(x)):
random_first = randint(0, len(first)-1)
random_last = randint(0, len(second)-1)
names.append("{0} {1}".format(
first[random_first],
second[random_last])
)
return set(names)
first_names = ["Drew", "Mike", "Landon", "Jeremy", "Tyler", "Tom", "Avery"]
last_names = ["Smith", "Jones", "Brighton", "Taylor"]
names = random_name_generator(first_names, last_names, 5)
print('\n'.join(names))
html_to_markdown
# Convert all html files in a single directory to markdown
#
# 1. Install pandoc
# 2. Run the script
FILES=*.html
for f in $FILES
do
# extension="${f##*.}"
filename="${f%.*}"
echo "Converting $f to $filename.md"
`pandoc $f -t markdown -o ../mds/$filename.md`
# uncomment this line to delete the source file.
# rm $f
done
check_my_environment
"""
Pass in a config file based on your environment.
Example:
import check_my_environment
class Main:
def __init__(self, configFile):
pass
def process(self):
print("ok")
if __name__ == "__main__":
m = Main(some_script.CONFIGFILE)
m.process()
"""
import os
import sys
ENVIRONMENT = "development"
CONFIGFILE = None
def get_config_file():
directory = os.path.dirname(__file__)
return {
"development": "{}/../config/development.cfg".format(directory),
"staging": "{}/../config/staging.cfg".format(directory),
"production": "{}/../config/production.cfg".format(directory)
}.get(ENVIRONMENT, None)
CONFIGFILE = get_config_file()
if CONFIGFILE is None:
sys.exit("Configuration error! Unknown environment set. \
Edit config.py and set appropriate environment")
print("Config file: {}".format(CONFIGFILE))
if not os.path.exists(CONFIGFILE):
sys.exit("Configuration error! Config file does not exist")
print("Config ok ....")
jinja_quick_load
"""
Render a quick Jinja2 template.
Thanks Danny - http://pydanny.com/jinja2-quick-load-function.html
Example:
>>> from jinja_quick_load import render_from_template
>>> data = {
... "date": "June 12, 2014",
... "items": ["oranges", "bananas", "steak", "milk"]
... }
>>> render_from_template(".", "shopping_list.html", **data)
"""
from jinja2 import FileSystemLoader, Environment
def render_from_template(directory, template_name, **kwargs):
loader = FileSystemLoader(directory)
env = Environment(loader=loader)
template = env.get_template(template_name)
return template.render(**kwargs)
rewrite_git_history
I always forget how to back date, so here we go ...
This is dangerous and should be signed off by the omniscience, omnipotence Git him/herself. Rewriting history is evil, in other words.
$ git add <file_name>
$ export GIT_COMMITER_DATE="Sun Jun 15 14:00 2014 +0100"
$ export GIT_AUTHOR_DATE="Sun Jun 15 14:00 2014 +0100"
$ git commit -m "so bad"
$ git push
GIT_COMMITER_DATE
andGIT_AUTHOR_DATE
are environment variables
zipper
import os
from datetime import datetime
from zipfile import ZipFile
# set file name and time of creation
today = datetime.now()
file_name = 'zipper_' + today.strftime('%Y.%m.%dh%H%M') + '.zip'
dir_name = 'tmp/' # update path
def zipdir(path, zip):
for root, dirs, files in os.walk(path):
for file in files:
zip.write(os.path.join(root, file))
if __name__ == '__main__':
zipfile = ZipFile(file_name, 'w')
zipdir(dir_name, zipfile)
zipfile.close()
查找当前文件下面所有文件中是否包含某个字符
find . -name "*.py"| xargs cat | grep org.csv
linux 挂载samba同步文件
#sudo apt-get install cifs-utils
sudo mount -t cifs //IP/share $(pwd)/share -o username=username
//192.168.3.145/username /home/username/dev/ cifs defaults,username=username,password=password,uid=uid,gid=gid
mount -t cifs //60.205.230.226/share $(pwd)/share -o username=xxxxxxxx,passwd=xxxxxxx
sudo mount -t cifs //192.168.0.103/Public /mnt/samba/ -o guest
mount -t smbfs -o codepage=cp936,username=用户名,password=密码 , -l //ip地址/共享文件夹名 挂载点
或
mount -t smbfs -o codepage=cp936,username=用户名,password=密码 , -l //计算机名/共享文件夹名 挂载点
若没有设置用户名和密码,则可以简化为:
mount -t smbfs -o codepage=cp936 //ip地址或计算机名/共享文件夹名 挂载点
根据端口号查看进程
lsof -Pnl +M -i4 | grep 8010
Linux下查看端口号所使用的进程号:
使用lsof命令: lsof –i:端口号
ubuntu:查看进程占用端口号
sudo netstat -anp|grep pid
Linux登录ssh携带密码
sudo apt-get install sshpass
sshpass -p '12345678' ssh androidyue@10.0.5.10
注意上述必须要有单引号,不能为双引号。
非交互式的sudo执行
echo password | sudo -S ls
远程主机的ssh操作
ssh centos@192.168.202.205 << AAA
ls
exit
AAA
作者:百里求一
出处:http://www.cnblogs.com/bergus/
我的语雀: https://www.yuque.com/barry.bai
本文版权归作者和博客园共有,欢迎转载,但未经作者同意必须保留此段声明,且在文章页面明显位置给出原文连接,否则保留追究法律责任的权利。