Skip to content

Commit 7984c88

Browse files
authored
Merge pull request #221 from ligangty/re-checksum
Add new refresh checksum cmd
2 parents 4c17aeb + b6c3a23 commit 7984c88

File tree

8 files changed

+445
-32
lines changed

8 files changed

+445
-32
lines changed

charon/cmd/__init__.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from charon.cmd.cmd_upload import upload
1818
from charon.cmd.cmd_delete import delete
1919
from charon.cmd.cmd_index import index
20-
from charon.cmd.cmd_checksum import checksum_validate
20+
from charon.cmd.cmd_checksum import init_checksum, checksum
2121
from charon.cmd.cmd_cache import init_cf, cf
2222

2323

@@ -33,8 +33,11 @@ def cli():
3333
cli.add_command(upload)
3434
cli.add_command(delete)
3535
cli.add_command(index)
36-
cli.add_command(checksum_validate)
3736

3837
# init cf cmmand
3938
init_cf()
4039
cli.add_command(cf)
40+
41+
# init checksum command
42+
init_checksum()
43+
cli.add_command(checksum)

charon/cmd/cmd_cache.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
"-f",
5555
"path_file",
5656
help="""
57-
The file which contain the paths to be invalidated in CF. Pahts in this file follow the
57+
The file which contain the paths to be invalidated in CF. Paths in this file follow the
5858
format of CF defining too, and each path should be in a single line.
5959
"""
6060
)

charon/cmd/cmd_checksum.py

Lines changed: 134 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,14 @@
1313
See the License for the specific language governing permissions and
1414
limitations under the License.
1515
"""
16-
from typing import List
16+
from typing import List, Tuple
1717

1818
from charon.config import get_config
19-
from charon.pkgs.checksum_http import handle_checksum_validation_http
19+
from charon.pkgs.checksum_http import (
20+
handle_checksum_validation_http, refresh_checksum
21+
)
2022
from charon.cmd.internal import _decide_mode
21-
from click import command, option, argument
23+
from click import command, option, argument, group
2224

2325
import traceback
2426
import logging
@@ -99,7 +101,7 @@
99101
required=True
100102
)
101103
@command()
102-
def checksum_validate(
104+
def validate(
103105
path: str,
104106
target: str,
105107
includes: List[str],
@@ -118,22 +120,11 @@ def checksum_validate(
118120
be recorded.
119121
"""
120122
_decide_mode(
121-
"checksum-{}".format(target), path.replace("/", "_"),
123+
"checksum-validate-{}".format(target), path.replace("/", "_"),
122124
is_quiet=quiet, is_debug=debug
123125
)
124126
try:
125-
conf = get_config()
126-
if not conf:
127-
sys.exit(1)
128-
129-
aws_bucket = ""
130-
root_path = ""
131-
t = conf.get_target(target)
132-
if not t:
133-
sys.exit(1)
134-
for b in t:
135-
aws_bucket = b.get('bucket')
136-
prefix = b.get('prefix', '')
127+
(aws_bucket, prefix) = _init_cmd(target)
137128

138129
# NOTE: This is a liitle hacky, which constrain the configuration of
139130
# of target should define the bucket to contain "prod-maven"
@@ -153,3 +144,129 @@ def checksum_validate(
153144
except Exception:
154145
print(traceback.format_exc())
155146
sys.exit(2)
147+
148+
149+
@option(
150+
"--debug",
151+
"-D",
152+
"debug",
153+
help="Debug mode, will print all debug logs for problem tracking.",
154+
is_flag=True,
155+
default=False
156+
)
157+
@option(
158+
"--quiet",
159+
"-q",
160+
"quiet",
161+
help="Quiet mode, will shrink most of the logs except warning and errors.",
162+
is_flag=True,
163+
default=False
164+
)
165+
@option(
166+
"--path",
167+
"-p",
168+
"paths",
169+
help="""
170+
The paths of artifact files to do checksum refreshing.
171+
""",
172+
multiple=True
173+
)
174+
@option(
175+
"--path-file",
176+
"-f",
177+
"path_file",
178+
help="""
179+
The file which contain the paths of artifact files to do checksum refreshing.
180+
Each path in this file should be in a single line.
181+
"""
182+
)
183+
@option(
184+
"--target",
185+
"-t",
186+
"target",
187+
help="""
188+
The target to do the uploading, which will decide which s3 bucket
189+
and what root path where all files will be uploaded to.
190+
Can accept more than one target.
191+
""",
192+
required=True
193+
)
194+
@command()
195+
def refresh(
196+
target: str,
197+
paths: List[str],
198+
path_file: str,
199+
quiet: bool = False,
200+
debug: bool = False
201+
):
202+
"""
203+
Refresh the checksum of the specified path for the target maven repository.
204+
It will calculate the checksum files of the specified artifact and see if
205+
unmatched, then regenerate the checksum files based on the artifact.
206+
Default checksum files include .md5, .sha1.
207+
"""
208+
_decide_mode(
209+
"checksum-refresh-{}".format(target), "",
210+
is_quiet=quiet, is_debug=debug, use_log_file=False
211+
)
212+
if not paths and not path_file:
213+
logger.error(
214+
"No path specified, please specify at least one path "
215+
"through --path or --path-file.")
216+
sys.exit(1)
217+
218+
work_paths = []
219+
if paths:
220+
work_paths.extend(paths)
221+
222+
conf = get_config()
223+
aws_profile = os.getenv("AWS_PROFILE") or conf.get_aws_profile()
224+
if not aws_profile:
225+
logger.error("No AWS profile specified!")
226+
sys.exit(1)
227+
228+
if path_file:
229+
with open(path_file, "r", encoding="utf-8") as f:
230+
for line in f.readlines():
231+
work_paths.append(str(line).strip())
232+
try:
233+
(aws_bucket, prefix) = _init_cmd(target)
234+
235+
# NOTE: This is a liitle hacky, which constrain the configuration of
236+
# of target should define the bucket to contain "prod-maven"
237+
# or "stage-maven" to decide that the bucket is for maven repo
238+
# in our defined aws env for production or stage
239+
if "prod-maven" not in aws_bucket and "stage-maven" not in aws_bucket:
240+
logger.error("The target %s is not a maven repository.", target)
241+
sys.exit(1)
242+
243+
refresh_checksum((aws_bucket, prefix), work_paths, aws_profile)
244+
except Exception:
245+
print(traceback.format_exc())
246+
sys.exit(2)
247+
248+
249+
def _init_cmd(target: str) -> Tuple[str, str]:
250+
conf = get_config()
251+
if not conf:
252+
sys.exit(1)
253+
aws_bucket = ""
254+
t = conf.get_target(target)
255+
if not t:
256+
sys.exit(1)
257+
for b in t:
258+
aws_bucket = b.get('bucket')
259+
prefix = b.get('prefix', '')
260+
return (aws_bucket, prefix)
261+
262+
263+
@group()
264+
def checksum():
265+
"""checksum commands are responsible to operate checksum files
266+
of maven products operated by Charon
267+
"""
268+
269+
270+
def init_checksum():
271+
checksum.add_command(validate)
272+
checksum.add_command(refresh)

charon/pkgs/checksum_http.py

Lines changed: 85 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@
1313
See the License for the specific language governing permissions and
1414
limitations under the License.
1515
"""
16-
from charon.utils.files import digest
16+
from charon.utils.files import digest, HashType
17+
from charon.storage import S3Client
1718
from typing import Tuple, List, Dict
1819
from html.parser import HTMLParser
1920
import tempfile
@@ -36,9 +37,8 @@ def handle_checksum_validation_http(
3637
skips: List[str] = None
3738
):
3839
""" Handle the checksum check for maven artifacts.
39-
* target contains bucket name and prefix for the bucket, which will
40-
be used to store artifacts with the prefix. See target definition
41-
in Charon configuration for details.
40+
* bucket contains store artifacts with the prefix. See target
41+
definition in Charon configuration for details.
4242
* path is the root path where to start the validation in the bucket.
4343
* includes are the file suffixes which will decide the types of files
4444
to do the validation.
@@ -266,3 +266,84 @@ def _decide_root_url(bucket: str) -> str:
266266
if bucket.strip().startswith("stage-maven"):
267267
return "https://maven.stage.repository.redhat.com"
268268
return None
269+
270+
271+
def refresh_checksum(
272+
target: Tuple[str, str],
273+
paths: List[str],
274+
aws_profile: str = None
275+
):
276+
"""Refresh checksum for files in a given bucket.
277+
* bucket contains store artifacts with the prefix. See target
278+
definition in Charon configuration for details.
279+
* paths are the exact files whose checksum files will be
280+
refreshed with.
281+
"""
282+
bucket_name = target[0]
283+
prefix = target[1]
284+
s3_client = S3Client(aws_profile=aws_profile)
285+
real_prefix = prefix if prefix.strip() != "/" else ""
286+
filetype_filter = [".prodinfo", ".sha1", ".sha256", ".md5"]
287+
for path in paths:
288+
is_artifact = True
289+
for filetype in filetype_filter:
290+
if path.strip().endswith(filetype):
291+
is_artifact = False
292+
continue
293+
if not is_artifact:
294+
logger.info(
295+
"%s is not an artifact file for maven products. Skipped.",
296+
path
297+
)
298+
continue
299+
s3_path = os.path.join(real_prefix, path)
300+
checksums = {
301+
".md5": HashType.MD5,
302+
".sha1": HashType.SHA1,
303+
".sha256": HashType.SHA256,
304+
".sha512": HashType.SHA512
305+
}
306+
if s3_client.file_exists_in_bucket(bucket_name, s3_path):
307+
temp_f = os.path.join(tempfile.gettempdir(), path)
308+
folder = os.path.dirname(temp_f)
309+
try:
310+
if not os.path.exists(folder):
311+
os.makedirs(folder)
312+
s3_client.download_file(bucket_name, s3_path, temp_f)
313+
existed_checksum_types = []
314+
for file_type in checksums:
315+
s3_checksum_path = s3_path + file_type
316+
if s3_client.file_exists_in_bucket(bucket_name, s3_checksum_path):
317+
existed_checksum_types.append(file_type)
318+
if existed_checksum_types:
319+
for file_type in existed_checksum_types:
320+
checksum_path = path + file_type
321+
s3_checksum_path = s3_path + file_type
322+
hash_type = checksums[file_type]
323+
correct_checksum_c = digest(temp_f, hash_type)
324+
original_checksum_c = s3_client.read_file_content(
325+
bucket_name, s3_checksum_path
326+
)
327+
if correct_checksum_c == original_checksum_c:
328+
logger.info("Checksum %s matches, no need to refresh.", checksum_path)
329+
else:
330+
logger.info("Checksum %s does not match, refreshing...", checksum_path)
331+
s3_client.simple_upload_file(
332+
file_path=checksum_path,
333+
file_content=correct_checksum_c,
334+
target=(bucket_name, prefix),
335+
mime_type="text/plain",
336+
force=True
337+
)
338+
else:
339+
logger.warning(
340+
"No valid checksum files exist for %s, Skipped."
341+
" Are you sure it is a valid maven artifact?",
342+
path
343+
)
344+
finally:
345+
if folder and folder != tempfile.gettempdir() and os.path.exists(folder):
346+
shutil.rmtree(folder)
347+
logger.info("Checksums are refreshed for artifact %s", path)
348+
else:
349+
logger.warning("File %s does not exist in bucket %s", s3_path, bucket_name)

charon/storage.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -675,14 +675,15 @@ def simple_upload_file(
675675
self, file_path: str, file_content: str,
676676
target: Tuple[str, str],
677677
mime_type: str = None,
678-
check_sum_sha1: str = None
678+
check_sum_sha1: str = None,
679+
force: bool = False
679680
):
680681
""" Uploads file to s3 bucket, regardless of any extra
681682
information like product and version info.
682-
* Warning: this will directly overwrite the files even if
683-
it has lots of product info, so please be careful to use.
684-
If you want to upload product artifact files, please use
685-
upload_files
683+
* Warning: If force is set True, it will directly overwrite
684+
the files even if it has lots of product info, so please be
685+
careful to use. If you want to upload product artifact files,
686+
please use upload_files()
686687
"""
687688
bucket = target[0]
688689
prefix = target[1]
@@ -705,7 +706,7 @@ def simple_upload_file(
705706
content_type = mime_type
706707
if not content_type:
707708
content_type = DEFAULT_MIME_TYPE
708-
if not existed:
709+
if not existed or force:
709710
f_meta = {}
710711
if check_sum_sha1 and check_sum_sha1.strip() != "":
711712
f_meta[CHECKSUM_META_KEY] = check_sum_sha1
@@ -723,7 +724,9 @@ def simple_upload_file(
723724
file_path, bucket, e
724725
)
725726
else:
726-
raise FileExistsError("Error: file %s already exists, upload is forbiden.")
727+
raise FileExistsError(
728+
f"Error: file {path_key} already exists, upload is forbiden."
729+
)
727730

728731
def delete_manifest(self, product_key: str, target: str, manifest_bucket_name: str):
729732
if not manifest_bucket_name:
@@ -780,6 +783,10 @@ def read_file_content(self, bucket_name: str, key: str) -> str:
780783
file_object = bucket.Object(key)
781784
return str(file_object.get()['Body'].read(), 'utf-8')
782785

786+
def download_file(self, bucket_name: str, key: str, file_path: str):
787+
bucket = self.__get_bucket(bucket_name)
788+
bucket.download_file(key, file_path)
789+
783790
def list_folder_content(self, bucket_name: str, folder: str) -> List[str]:
784791
"""List the content in folder in an s3 bucket. Note it's not recursive,
785792
which means the content only contains the items in that folder, but

0 commit comments

Comments
 (0)